Discriminant Analysis by Dr. Sebnem Er

lda() function for DA

library(MASS)
library(klaR)
## Warning: package 'klaR' was built under R version 3.6.3

Read the data

jobclassifications <- read.csv("C:/Users/01438475/Google Drive/UCTcourses/STA3022F/Practicals/prac6_Ch8_Discriminant/jobclassifications.csv")
attach(jobclassifications)
names(jobclassifications)
## [1] "outdoor"      "social"       "conservative" "job"          "jobcat"      
## [6] "jobcat2"
head(jobclassifications)
##   outdoor social conservative              job jobcat         jobcat2
## 1      10     22            5 customer service      1 customerservice
## 2      14     17            6 customer service      1 customerservice
## 3      19     33            7 customer service      1 customerservice
## 4      14     29           12 customer service      1 customerservice
## 5      14     25            7 customer service      1 customerservice
## 6      20     25           12 customer service      1 customerservice

Variable and Data Description

datatoWork=jobclassifications
dependentVariable=jobcat
numberofCat=3 # How many categories are there in your Y (dependent) variable - jobcat variable

Step1) Stepwise Estimation: The F test of Wilks’ lambda shows which variables’ contributions are significant.

formulaAll=dependentVariable~outdoor+social+conservative
  
greedy.wilks(formulaAll,data=datatoWork, niveau = 0.15) 
## Formula containing included variables: 
## 
## dependentVariable ~ social + outdoor + conservative
## <environment: 0x000000001d576c00>
## 
## 
## Values calculated in each step of the selection procedure: 
## 
##           vars Wilks.lambda F.statistics.overall p.value.overall
## 1       social    0.6039814             79.00945    4.105260e-27
## 2      outdoor    0.4326715             62.43246    1.814539e-42
## 3 conservative    0.3639880             52.38173    1.634919e-49
##   F.statistics.diff p.value.diff
## 1          79.00945 4.105260e-27
## 2          47.51225 0.000000e+00
## 3          22.54931 1.061537e-09
formulaStepwise=dependentVariable~outdoor+social+conservative

fit <- lda(formulaStepwise,data=datatoWork, method="moment")
fit
## Call:
## lda(formulaStepwise, data = datatoWork, method = "moment")
## 
## Prior probabilities of groups:
##         1         2         3 
## 0.3483607 0.3811475 0.2704918 
## 
## Group means:
##    outdoor   social conservative
## 1 12.51765 24.22353     9.023529
## 2 18.53763 21.13978    10.139785
## 3 15.57576 15.45455    13.242424
## 
## Coefficients of linear discriminants:
##                      LD1         LD2
## outdoor       0.09198065 -0.22501431
## social       -0.19427415 -0.04978105
## conservative  0.15499199  0.08734288
## 
## Proportion of trace:
##    LD1    LD2 
## 0.7712 0.2288

Step2) Discriminant Score Estimates for Each Observation

fitPredict=predict(fit)
#head(fitPredict)
names(fitPredict)
## [1] "class"     "posterior" "x"
LDscores=fitPredict$x
#head(LDscores)
LD1=LDscores[,1] # Discriminant Scores Obtained from the first Discriminant Function
LD2=LDscores[,2] # Discriminant Scores Obtained from the second Discriminant Function

Plot the discriminant scores

plot(fit) #if you get an error of margin, do enlarge the plot window, it will work

plot(LD1,xlab="first linear discriminant",col=factor(dependentVariable))
legend("topleft", legend=levels(factor(dependentVariable)), 
       text.col=seq_along(levels(factor(dependentVariable))))

plot(LD2,xlab="second linear discriminant",col=factor(dependentVariable))
legend("topleft", legend=levels(factor(dependentVariable)), 
       text.col=seq_along(levels(factor(dependentVariable))))

Step3-a) Obtain the centroids and test if the centroids are significantly different from zero.

categories=row.names(as.matrix(table(dependentVariable)))
categories
## [1] "1" "2" "3"
centroid1G1=sum(LDscores[,1]*(dependentVariable==categories[1]))/sum(dependentVariable==categories[1])
centroid2G1=sum(LDscores[,2]*(dependentVariable==categories[1]))/sum(dependentVariable==categories[1])
centroid1G2=sum(LDscores[,1]*(dependentVariable==categories[2]))/sum(dependentVariable==categories[2])
centroid2G2=sum(LDscores[,2]*(dependentVariable==categories[2]))/sum(dependentVariable==categories[2])
centroid1G3=sum(LDscores[,1]*(dependentVariable==categories[3]))/sum(dependentVariable==categories[3])
centroid2G3=sum(LDscores[,2]*(dependentVariable==categories[3]))/sum(dependentVariable==categories[3])

centroids=matrix(c(centroid1G1,centroid1G2,centroid1G3,centroid2G1,centroid2G2,centroid2G3),
                 nrow=3,ncol=2,dimnames=list(c("CustomerService","Mechanic","Dispatch"),
                                             c("Centroid1","Centroid2")))
centroids
##                  Centroid1  Centroid2
## CustomerService -1.2191002  0.3890039
## Mechanic         0.1067246 -0.7145704
## Dispatch         1.4196686  0.5059049

Differences between the centroids

distanceG1G2=(centroids[1,1]-centroids[2,1])^2+(centroids[1,2]-centroids[2,2])^2
distanceG1G3=(centroids[1,1]-centroids[3,1])^2+(centroids[1,2]-centroids[3,2])^2
distanceG2G3=(centroids[2,1]-centroids[3,1])^2+(centroids[2,2]-centroids[3,2])^2

cbind(distanceG1G2,distanceG1G3,distanceG2G3)
##      distanceG1G2 distanceG1G3 distanceG2G3
## [1,]     2.975688     6.976766     3.213382
table(jobcat)
## jobcat
##  1  2  3 
## 85 93 66

Test the differences between the centroids

This has to be done using the F-test formula (check your lecture notes page164)

Step3-b) Calculate the Hit Rate

misclassification.rate=function(tab){
  num1=sum(diag(tab))
  denom1=sum(tab)
  signif(1-num1/denom1,3)
}

classificationTable=table(dependentVariable,fitPredict$class)
classificationTable
##                  
## dependentVariable  1  2  3
##                 1 68 13  4
##                 2 16 67 10
##                 3  3 13 50
misRate=misclassification.rate(classificationTable)
misRate
## [1] 0.242
hitRate=1-misRate
hitRate
## [1] 0.758

Step3-c) Overall Test of the Discriminant Function

independentVars<-as.matrix(datatoWork[,c(1,2,3)])
independentVars
##        outdoor social conservative
##   [1,]      10     22            5
##   [2,]      14     17            6
##   [3,]      19     33            7
##   [4,]      14     29           12
##   [5,]      14     25            7
##   [6,]      20     25           12
##   [7,]       6     18            4
##   [8,]      13     27            7
##   [9,]      18     31            9
##  [10,]      16     35           13
##  [11,]      17     25            8
##  [12,]      10     29           11
##  [13,]      17     25            7
##  [14,]      10     22           13
##  [15,]      10     31           13
##  [16,]      18     25            5
##  [17,]       0     27           11
##  [18,]      10     24           12
##  [19,]      15     23           10
##  [20,]       8     29           14
##  [21,]       6     27           11
##  [22,]      10     17            8
##  [23,]       1     30            6
##  [24,]      14     29            7
##  [25,]      13     21           11
##  [26,]      21     31           11
##  [27,]      12     26            9
##  [28,]      12     22            9
##  [29,]       5     25            7
##  [30,]      10     24            5
##  [31,]       3     20           14
##  [32,]       6     25           12
##  [33,]      11     27           10
##  [34,]      13     21           14
##  [35,]      11     23            5
##  [36,]       8     18            8
##  [37,]       5     17            9
##  [38,]      11     22           11
##  [39,]      14     22           11
##  [40,]      22     22            6
##  [41,]      16     28            6
##  [42,]      12     25            8
##  [43,]      12     25            7
##  [44,]      15     21            4
##  [45,]      11     28            8
##  [46,]      11     20            9
##  [47,]      15     19            9
##  [48,]      15     24            7
##  [49,]      15     21           10
##  [50,]      17     26            7
##  [51,]      12     28           13
##  [52,]       7     28           12
##  [53,]      14     12            6
##  [54,]      22     24            6
##  [55,]      22     27           12
##  [56,]      18     30            9
##  [57,]      16     18            5
##  [58,]      12     23            4
##  [59,]      16     22            2
##  [60,]      15     26            9
##  [61,]       7     13            7
##  [62,]       6     18            6
##  [63,]       9     24            6
##  [64,]       9     20           12
##  [65,]      20     28            8
##  [66,]       5     22           15
##  [67,]      14     26           17
##  [68,]       8     28           12
##  [69,]      14     22            9
##  [70,]      15     26            4
##  [71,]      15     25           10
##  [72,]      14     27            6
##  [73,]      15     25           11
##  [74,]      11     26            9
##  [75,]      10     28            5
##  [76,]       7     22           10
##  [77,]      11     15           12
##  [78,]      14     25           15
##  [79,]      18     28            7
##  [80,]      14     29            8
##  [81,]      17     20            6
##  [82,]      13     25           14
##  [83,]       9     21           12
##  [84,]      13     26           13
##  [85,]      16     24           10
##  [86,]      20     27            6
##  [87,]      21     15           10
##  [88,]      15     27           12
##  [89,]      15     29            8
##  [90,]      11     25           11
##  [91,]      24      9           17
##  [92,]      18     21           13
##  [93,]      14     18            4
##  [94,]      13     22           12
##  [95,]      17     21            9
##  [96,]      16     28           13
##  [97,]      15     22           12
##  [98,]      24     20           15
##  [99,]      14     19           13
## [100,]      14     28            1
## [101,]      18     17           11
## [102,]      14     24            7
## [103,]      12     16           10
## [104,]      16     21           10
## [105,]      18     19            9
## [106,]      19     26            7
## [107,]      13     20           10
## [108,]      28     16           10
## [109,]      17     19           11
## [110,]      24     14            7
## [111,]      19     23           12
## [112,]      22     12            8
## [113,]      22     21           11
## [114,]      21     19            9
## [115,]      18     24           13
## [116,]      23     27           11
## [117,]      20     23           12
## [118,]      19     13            7
## [119,]      17     28           13
## [120,]      20     24            5
## [121,]      21     23           11
## [122,]      17     21           15
## [123,]      11     25           12
## [124,]      14     19           14
## [125,]      18     24            5
## [126,]      13     14            7
## [127,]      22     18           16
## [128,]      25     17           13
## [129,]      19     25           13
## [130,]      20     20            9
## [131,]      21     25           11
## [132,]      17     24           11
## [133,]      18     26           10
## [134,]      21     29           11
## [135,]      17     21           12
## [136,]      17     19           12
## [137,]      17     16            6
## [138,]      16     22            5
## [139,]      22     19           10
## [140,]      19     23           12
## [141,]      16     23            9
## [142,]      18     27           11
## [143,]      21     24           12
## [144,]      15     22           13
## [145,]      19     26           12
## [146,]      14     17           11
## [147,]      15     23            7
## [148,]      23     20           16
## [149,]      22     26           15
## [150,]      13     16           11
## [151,]      25     29           11
## [152,]      23     24            7
## [153,]      17     29            9
## [154,]      21     19            7
## [155,]      15     13            6
## [156,]      19     27           14
## [157,]      22     24           14
## [158,]      17     18            8
## [159,]      21     19            8
## [160,]      24     18           13
## [161,]      21     12            9
## [162,]      15     17            8
## [163,]      24     22           14
## [164,]      19     19            7
## [165,]      23     16           10
## [166,]      21     29           12
## [167,]      20     19           11
## [168,]      18     28            0
## [169,]      23     21           16
## [170,]      17     17            8
## [171,]      17     24            5
## [172,]      17     18           15
## [173,]      17     23           10
## [174,]      19     15           10
## [175,]      17     20            8
## [176,]      25     20            8
## [177,]      16     19            8
## [178,]      19     16            6
## [179,]      19     19           16
## [180,]      17     17           12
## [181,]       8     17           14
## [182,]      13     20           16
## [183,]      14     18            4
## [184,]      17     12           13
## [185,]      17     12           17
## [186,]      14     21           16
## [187,]      19     18           12
## [188,]      18     16           15
## [189,]      15     14           17
## [190,]      20     15            7
## [191,]      24     20           13
## [192,]      16     16           17
## [193,]      17     15           10
## [194,]      17     10           12
## [195,]      11     16           11
## [196,]      15     18           14
## [197,]      20     19           16
## [198,]      14     22           16
## [199,]      13     15           18
## [200,]      16     14           13
## [201,]      12     12            6
## [202,]      17     17           19
## [203,]      10      8           16
## [204,]      11     17           20
## [205,]      13     16            7
## [206,]      19     15           13
## [207,]      15     11           13
## [208,]      17     11           10
## [209,]      15     10           13
## [210,]      19     14           12
## [211,]      19     14           15
## [212,]       4     12           11
## [213,]      13     12           15
## [214,]      20     13           19
## [215,]      14     18           14
## [216,]      10     12            9
## [217,]      11     12           19
## [218,]       8     20            8
## [219,]      14     16            7
## [220,]      18     20           15
## [221,]      19      7           13
## [222,]      21     13           11
## [223,]      14     26           15
## [224,]      25     16           12
## [225,]      18     11           19
## [226,]      14     16            6
## [227,]      13     20           18
## [228,]      20     16           14
## [229,]      12     14            8
## [230,]      16     19           12
## [231,]      21     15            7
## [232,]      18     23           15
## [233,]      19     11           13
## [234,]      17     18            9
## [235,]       4     10           15
## [236,]      17     17           14
## [237,]      14     13           12
## [238,]      15     16           14
## [239,]      20     13           18
## [240,]      20     14           18
## [241,]      16     22           12
## [242,]       9     13           16
## [243,]      15     13           13
## [244,]      18     20           10
manova1<-manova(independentVars ~ dependentVariable)
wilks.test<-summary(manova1,test="Wilks")
wilks.test
##                    Df   Wilks approx F num Df den Df    Pr(>F)    
## dependentVariable   1 0.48065   86.441      3    240 < 2.2e-16 ***
## Residuals         242                                             
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1