library(MASS)
library(klaR)
## Warning: package 'klaR' was built under R version 3.6.3
jobclassifications <- read.csv("C:/Users/01438475/Google Drive/UCTcourses/STA3022F/Practicals/prac6_Ch8_Discriminant/jobclassifications.csv")
attach(jobclassifications)
names(jobclassifications)
## [1] "outdoor" "social" "conservative" "job" "jobcat"
## [6] "jobcat2"
head(jobclassifications)
## outdoor social conservative job jobcat jobcat2
## 1 10 22 5 customer service 1 customerservice
## 2 14 17 6 customer service 1 customerservice
## 3 19 33 7 customer service 1 customerservice
## 4 14 29 12 customer service 1 customerservice
## 5 14 25 7 customer service 1 customerservice
## 6 20 25 12 customer service 1 customerservice
datatoWork=jobclassifications
dependentVariable=jobcat
numberofCat=3 # How many categories are there in your Y (dependent) variable - jobcat variable
formulaAll=dependentVariable~outdoor+social+conservative
greedy.wilks(formulaAll,data=datatoWork, niveau = 0.15)
## Formula containing included variables:
##
## dependentVariable ~ social + outdoor + conservative
## <environment: 0x000000001d576c00>
##
##
## Values calculated in each step of the selection procedure:
##
## vars Wilks.lambda F.statistics.overall p.value.overall
## 1 social 0.6039814 79.00945 4.105260e-27
## 2 outdoor 0.4326715 62.43246 1.814539e-42
## 3 conservative 0.3639880 52.38173 1.634919e-49
## F.statistics.diff p.value.diff
## 1 79.00945 4.105260e-27
## 2 47.51225 0.000000e+00
## 3 22.54931 1.061537e-09
formulaStepwise=dependentVariable~outdoor+social+conservative
fit <- lda(formulaStepwise,data=datatoWork, method="moment")
fit
## Call:
## lda(formulaStepwise, data = datatoWork, method = "moment")
##
## Prior probabilities of groups:
## 1 2 3
## 0.3483607 0.3811475 0.2704918
##
## Group means:
## outdoor social conservative
## 1 12.51765 24.22353 9.023529
## 2 18.53763 21.13978 10.139785
## 3 15.57576 15.45455 13.242424
##
## Coefficients of linear discriminants:
## LD1 LD2
## outdoor 0.09198065 -0.22501431
## social -0.19427415 -0.04978105
## conservative 0.15499199 0.08734288
##
## Proportion of trace:
## LD1 LD2
## 0.7712 0.2288
fitPredict=predict(fit)
#head(fitPredict)
names(fitPredict)
## [1] "class" "posterior" "x"
LDscores=fitPredict$x
#head(LDscores)
LD1=LDscores[,1] # Discriminant Scores Obtained from the first Discriminant Function
LD2=LDscores[,2] # Discriminant Scores Obtained from the second Discriminant Function
plot(fit) #if you get an error of margin, do enlarge the plot window, it will work
plot(LD1,xlab="first linear discriminant",col=factor(dependentVariable))
legend("topleft", legend=levels(factor(dependentVariable)),
text.col=seq_along(levels(factor(dependentVariable))))
plot(LD2,xlab="second linear discriminant",col=factor(dependentVariable))
legend("topleft", legend=levels(factor(dependentVariable)),
text.col=seq_along(levels(factor(dependentVariable))))
categories=row.names(as.matrix(table(dependentVariable)))
categories
## [1] "1" "2" "3"
centroid1G1=sum(LDscores[,1]*(dependentVariable==categories[1]))/sum(dependentVariable==categories[1])
centroid2G1=sum(LDscores[,2]*(dependentVariable==categories[1]))/sum(dependentVariable==categories[1])
centroid1G2=sum(LDscores[,1]*(dependentVariable==categories[2]))/sum(dependentVariable==categories[2])
centroid2G2=sum(LDscores[,2]*(dependentVariable==categories[2]))/sum(dependentVariable==categories[2])
centroid1G3=sum(LDscores[,1]*(dependentVariable==categories[3]))/sum(dependentVariable==categories[3])
centroid2G3=sum(LDscores[,2]*(dependentVariable==categories[3]))/sum(dependentVariable==categories[3])
centroids=matrix(c(centroid1G1,centroid1G2,centroid1G3,centroid2G1,centroid2G2,centroid2G3),
nrow=3,ncol=2,dimnames=list(c("CustomerService","Mechanic","Dispatch"),
c("Centroid1","Centroid2")))
centroids
## Centroid1 Centroid2
## CustomerService -1.2191002 0.3890039
## Mechanic 0.1067246 -0.7145704
## Dispatch 1.4196686 0.5059049
distanceG1G2=(centroids[1,1]-centroids[2,1])^2+(centroids[1,2]-centroids[2,2])^2
distanceG1G3=(centroids[1,1]-centroids[3,1])^2+(centroids[1,2]-centroids[3,2])^2
distanceG2G3=(centroids[2,1]-centroids[3,1])^2+(centroids[2,2]-centroids[3,2])^2
cbind(distanceG1G2,distanceG1G3,distanceG2G3)
## distanceG1G2 distanceG1G3 distanceG2G3
## [1,] 2.975688 6.976766 3.213382
table(jobcat)
## jobcat
## 1 2 3
## 85 93 66
misclassification.rate=function(tab){
num1=sum(diag(tab))
denom1=sum(tab)
signif(1-num1/denom1,3)
}
classificationTable=table(dependentVariable,fitPredict$class)
classificationTable
##
## dependentVariable 1 2 3
## 1 68 13 4
## 2 16 67 10
## 3 3 13 50
misRate=misclassification.rate(classificationTable)
misRate
## [1] 0.242
hitRate=1-misRate
hitRate
## [1] 0.758
independentVars<-as.matrix(datatoWork[,c(1,2,3)])
independentVars
## outdoor social conservative
## [1,] 10 22 5
## [2,] 14 17 6
## [3,] 19 33 7
## [4,] 14 29 12
## [5,] 14 25 7
## [6,] 20 25 12
## [7,] 6 18 4
## [8,] 13 27 7
## [9,] 18 31 9
## [10,] 16 35 13
## [11,] 17 25 8
## [12,] 10 29 11
## [13,] 17 25 7
## [14,] 10 22 13
## [15,] 10 31 13
## [16,] 18 25 5
## [17,] 0 27 11
## [18,] 10 24 12
## [19,] 15 23 10
## [20,] 8 29 14
## [21,] 6 27 11
## [22,] 10 17 8
## [23,] 1 30 6
## [24,] 14 29 7
## [25,] 13 21 11
## [26,] 21 31 11
## [27,] 12 26 9
## [28,] 12 22 9
## [29,] 5 25 7
## [30,] 10 24 5
## [31,] 3 20 14
## [32,] 6 25 12
## [33,] 11 27 10
## [34,] 13 21 14
## [35,] 11 23 5
## [36,] 8 18 8
## [37,] 5 17 9
## [38,] 11 22 11
## [39,] 14 22 11
## [40,] 22 22 6
## [41,] 16 28 6
## [42,] 12 25 8
## [43,] 12 25 7
## [44,] 15 21 4
## [45,] 11 28 8
## [46,] 11 20 9
## [47,] 15 19 9
## [48,] 15 24 7
## [49,] 15 21 10
## [50,] 17 26 7
## [51,] 12 28 13
## [52,] 7 28 12
## [53,] 14 12 6
## [54,] 22 24 6
## [55,] 22 27 12
## [56,] 18 30 9
## [57,] 16 18 5
## [58,] 12 23 4
## [59,] 16 22 2
## [60,] 15 26 9
## [61,] 7 13 7
## [62,] 6 18 6
## [63,] 9 24 6
## [64,] 9 20 12
## [65,] 20 28 8
## [66,] 5 22 15
## [67,] 14 26 17
## [68,] 8 28 12
## [69,] 14 22 9
## [70,] 15 26 4
## [71,] 15 25 10
## [72,] 14 27 6
## [73,] 15 25 11
## [74,] 11 26 9
## [75,] 10 28 5
## [76,] 7 22 10
## [77,] 11 15 12
## [78,] 14 25 15
## [79,] 18 28 7
## [80,] 14 29 8
## [81,] 17 20 6
## [82,] 13 25 14
## [83,] 9 21 12
## [84,] 13 26 13
## [85,] 16 24 10
## [86,] 20 27 6
## [87,] 21 15 10
## [88,] 15 27 12
## [89,] 15 29 8
## [90,] 11 25 11
## [91,] 24 9 17
## [92,] 18 21 13
## [93,] 14 18 4
## [94,] 13 22 12
## [95,] 17 21 9
## [96,] 16 28 13
## [97,] 15 22 12
## [98,] 24 20 15
## [99,] 14 19 13
## [100,] 14 28 1
## [101,] 18 17 11
## [102,] 14 24 7
## [103,] 12 16 10
## [104,] 16 21 10
## [105,] 18 19 9
## [106,] 19 26 7
## [107,] 13 20 10
## [108,] 28 16 10
## [109,] 17 19 11
## [110,] 24 14 7
## [111,] 19 23 12
## [112,] 22 12 8
## [113,] 22 21 11
## [114,] 21 19 9
## [115,] 18 24 13
## [116,] 23 27 11
## [117,] 20 23 12
## [118,] 19 13 7
## [119,] 17 28 13
## [120,] 20 24 5
## [121,] 21 23 11
## [122,] 17 21 15
## [123,] 11 25 12
## [124,] 14 19 14
## [125,] 18 24 5
## [126,] 13 14 7
## [127,] 22 18 16
## [128,] 25 17 13
## [129,] 19 25 13
## [130,] 20 20 9
## [131,] 21 25 11
## [132,] 17 24 11
## [133,] 18 26 10
## [134,] 21 29 11
## [135,] 17 21 12
## [136,] 17 19 12
## [137,] 17 16 6
## [138,] 16 22 5
## [139,] 22 19 10
## [140,] 19 23 12
## [141,] 16 23 9
## [142,] 18 27 11
## [143,] 21 24 12
## [144,] 15 22 13
## [145,] 19 26 12
## [146,] 14 17 11
## [147,] 15 23 7
## [148,] 23 20 16
## [149,] 22 26 15
## [150,] 13 16 11
## [151,] 25 29 11
## [152,] 23 24 7
## [153,] 17 29 9
## [154,] 21 19 7
## [155,] 15 13 6
## [156,] 19 27 14
## [157,] 22 24 14
## [158,] 17 18 8
## [159,] 21 19 8
## [160,] 24 18 13
## [161,] 21 12 9
## [162,] 15 17 8
## [163,] 24 22 14
## [164,] 19 19 7
## [165,] 23 16 10
## [166,] 21 29 12
## [167,] 20 19 11
## [168,] 18 28 0
## [169,] 23 21 16
## [170,] 17 17 8
## [171,] 17 24 5
## [172,] 17 18 15
## [173,] 17 23 10
## [174,] 19 15 10
## [175,] 17 20 8
## [176,] 25 20 8
## [177,] 16 19 8
## [178,] 19 16 6
## [179,] 19 19 16
## [180,] 17 17 12
## [181,] 8 17 14
## [182,] 13 20 16
## [183,] 14 18 4
## [184,] 17 12 13
## [185,] 17 12 17
## [186,] 14 21 16
## [187,] 19 18 12
## [188,] 18 16 15
## [189,] 15 14 17
## [190,] 20 15 7
## [191,] 24 20 13
## [192,] 16 16 17
## [193,] 17 15 10
## [194,] 17 10 12
## [195,] 11 16 11
## [196,] 15 18 14
## [197,] 20 19 16
## [198,] 14 22 16
## [199,] 13 15 18
## [200,] 16 14 13
## [201,] 12 12 6
## [202,] 17 17 19
## [203,] 10 8 16
## [204,] 11 17 20
## [205,] 13 16 7
## [206,] 19 15 13
## [207,] 15 11 13
## [208,] 17 11 10
## [209,] 15 10 13
## [210,] 19 14 12
## [211,] 19 14 15
## [212,] 4 12 11
## [213,] 13 12 15
## [214,] 20 13 19
## [215,] 14 18 14
## [216,] 10 12 9
## [217,] 11 12 19
## [218,] 8 20 8
## [219,] 14 16 7
## [220,] 18 20 15
## [221,] 19 7 13
## [222,] 21 13 11
## [223,] 14 26 15
## [224,] 25 16 12
## [225,] 18 11 19
## [226,] 14 16 6
## [227,] 13 20 18
## [228,] 20 16 14
## [229,] 12 14 8
## [230,] 16 19 12
## [231,] 21 15 7
## [232,] 18 23 15
## [233,] 19 11 13
## [234,] 17 18 9
## [235,] 4 10 15
## [236,] 17 17 14
## [237,] 14 13 12
## [238,] 15 16 14
## [239,] 20 13 18
## [240,] 20 14 18
## [241,] 16 22 12
## [242,] 9 13 16
## [243,] 15 13 13
## [244,] 18 20 10
manova1<-manova(independentVars ~ dependentVariable)
wilks.test<-summary(manova1,test="Wilks")
wilks.test
## Df Wilks approx F num Df den Df Pr(>F)
## dependentVariable 1 0.48065 86.441 3 240 < 2.2e-16 ***
## Residuals 242
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1