Tutorial 2 - Categorical Variables

Categorical Predictors

Multicategorical Variables

Example

library(haven)
## Warning: package 'haven' was built under R version 4.1.2
milit <- read_sav("milit.sav")
str(milit)
## tibble [215 x 3] (S3: tbl_df/tbl/data.frame)
##  $ ID            : num [1:215] 1 2 3 4 5 6 7 8 9 10 ...
##   ..- attr(*, "format.spss")= chr "F8.2"
##  $ group         : dbl+lbl [1:215] 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
##    ..@ format.spss: chr "F8.2"
##    ..@ labels     : Named num [1:4] 0 1 2 3
##    .. ..- attr(*, "names")= chr [1:4] "mental health crisis team" "patrol" "K9" "tactical team"
##  $ militarization: num [1:215] 14.5 14.1 10.1 10.8 12.8 ...
##   ..- attr(*, "format.spss")= chr "F8.2"

Prepare our Dataset

milit$groupf <- factor(milit$group, 
                      levels = 0:3, 
                      labels = c("MH", "Patrol", "K9", 
                                  "Tac"))
attributes(milit$groupf)
## $levels
## [1] "MH"     "Patrol" "K9"     "Tac"   
## 
## $class
## [1] "factor"

Examine the means for our dependent variable at different levels of our independent variable

tapply(milit$militarization, milit$groupf, mean)
##       MH   Patrol       K9      Tac 
## 11.54941 20.39585 33.54770 41.96147

Indicator (Dummy) Coding

contr.treatment(4)
##   2 3 4
## 1 0 0 0
## 2 1 0 0
## 3 0 1 0
## 4 0 0 1

Indicator (Dummy) Coding

contrasts(milit$groupf) = contr.treatment(4)
indicator <- lm(militarization ~ groupf, milit)
indicator
## 
## Call:
## lm(formula = militarization ~ groupf, data = milit)
## 
## Coefficients:
## (Intercept)      groupf2      groupf3      groupf4  
##      11.549        8.846       21.998       30.412

Check for Yourself Exercise

tapply(milit$militarization, milit$groupf, mean)
##       MH   Patrol       K9      Tac 
## 11.54941 20.39585 33.54770 41.96147
contr.treatment(4)
##   2 3 4
## 1 0 0 0
## 2 1 0 0
## 3 0 1 0
## 4 0 0 1

Check for Yourself Exercise Answers

20.3959-11.5494
## [1] 8.8465
33.5477-11.5494
## [1] 21.9983
41.9615-11.5494
## [1] 30.4121
## 
## Call:
## lm(formula = militarization ~ groupf, data = milit)
## 
## Coefficients:
## (Intercept)      groupf2      groupf3      groupf4  
##      11.549        8.846       21.998       30.412

Effect Coding

contr.sum(4)
##   [,1] [,2] [,3]
## 1    1    0    0
## 2    0    1    0
## 3    0    0    1
## 4   -1   -1   -1

Effect Coding

contrasts(milit$groupf) = contr.sum(4)
effect<-lm(militarization ~ groupf, milit)
effect
## 
## Call:
## lm(formula = militarization ~ groupf, data = milit)
## 
## Coefficients:
## (Intercept)      groupf1      groupf2      groupf3  
##      26.864      -15.314       -6.468        6.684

Check for Yourself Exercise

tapply(milit$militarization, milit$groupf, mean)
##       MH   Patrol       K9      Tac 
## 11.54941 20.39585 33.54770 41.96147
contr.sum(4)
##   [,1] [,2] [,3]
## 1    1    0    0
## 2    0    1    0
## 3    0    0    1
## 4   -1   -1   -1

Check for Yourself

(11.5494 + 20.39585 + 33.54770 + 41.96147) / 4 
## [1] 26.8636
11.5494-26.8636
## [1] -15.3142
20.39585-26.8636
## [1] -6.46775
33.54770-26.8636
## [1] 6.6841
## 
## Call:
## lm(formula = militarization ~ groupf, data = milit)
## 
## Coefficients:
## (Intercept)      groupf1      groupf2      groupf3  
##      26.864      -15.314       -6.468        6.684

Sequential Coding

Sequential Coding

-With g groups, cj is set to 1 for cases that are members of a group ordinally higher than j on the variable defining groups; otherwise cj is set to 0

sequentialcodes = matrix(c(0, 1, 1, 1, 0, 0, 1, 1, 
                           0, 0, 0, 1), ncol = 3)
sequentialcodes
##      [,1] [,2] [,3]
## [1,]    0    0    0
## [2,]    1    0    0
## [3,]    1    1    0
## [4,]    1    1    1

Sequential Coding

contrasts(milit$groupf) = sequentialcodes
sequential <- lm(militarization ~ groupf, data = milit)
sequential
## 
## Call:
## lm(formula = militarization ~ groupf, data = milit)
## 
## Coefficients:
## (Intercept)      groupf1      groupf2      groupf3  
##      11.549        8.846       13.152        8.414

Check For Yourself

tapply(milit$militarization, milit$groupf, mean)
##       MH   Patrol       K9      Tac 
## 11.54941 20.39585 33.54770 41.96147
20.3959-11.5417
## [1] 8.8542
33.5477-20.3959
## [1] 13.1518
41.9615-33.5477
## [1] 8.4138
## 
## Call:
## lm(formula = militarization ~ groupf, data = milit)
## 
## Coefficients:
## (Intercept)      groupf1      groupf2      groupf3  
##      11.549        8.846       13.152        8.414

Helmert Coding

Helmert Coding

my.helmert = matrix(c(3/4, -1/4, -1/4, -1/4, 0, 2/3, -1/3, -1/3, 0, 0, 1/
                        2, -1/2), ncol = 3)
my.helmert
##       [,1]       [,2] [,3]
## [1,]  0.75  0.0000000  0.0
## [2,] -0.25  0.6666667  0.0
## [3,] -0.25 -0.3333333  0.5
## [4,] -0.25 -0.3333333 -0.5

Helmert Coding

contrasts(milit$groupf) = my.helmert
helmert <- lm(militarization ~ groupf, milit)
helmert
## 
## Call:
## lm(formula = militarization ~ groupf, data = milit)
## 
## Coefficients:
## (Intercept)      groupf1      groupf2      groupf3  
##      26.864      -20.419      -17.359       -8.414

Check For Yourself

(11.5494 + 20.39585 + 33.54770 + 41.96147) / 4 
## [1] 26.8636
(20.39585 + 33.54770 + 41.96147)/3
## [1] 31.96834
11.5494-31.9683
## [1] -20.4189
(33.54770 + 41.96147)/2
## [1] 37.75458
20.3959-37.7546
## [1] -17.3587
33.54770 - 41.9614
## [1] -8.4137
## 
## Call:
## lm(formula = militarization ~ groupf, data = milit)
## 
## Coefficients:
## (Intercept)      groupf1      groupf2      groupf3  
##      26.864      -20.419      -17.359       -8.414