#LDA on HBAT dataset.
#Use Discriminant Analysis to identify those perceptions of HBAT that best distinguish firms in each geographic region.
#Depending on the perception (independent var) of customers, we want to classify customers into the 2 regions (Dep var)
#So in future, just by getting the perceptions of customers (independent variable), the company will be able to determine, which region (USA or outside USA) the customer has rated.
setwd("C:/Program Files/R/R-3.4.1")
data <- read.csv("C://Program Files//R//R-3.4.1//HBAT_100.csv",header = TRUE,stringsAsFactors = FALSE)
set.seed(1234) #For random sample split into train and test
#For LDA only create a data frame of variables you want -
#Country(x4) Group 0 : USA/NA Group 1: Outside NA - dependent variable , perception variables (x6-x18) - indep var
ldadata <- subset(data,select = c(x4,x6:x18))
#Split into train and test i.e. analysis and hold-out sample
#60 for training and 40 for testing(RANDOMLY)
training = sample(nrow(ldadata),60,replace = FALSE)
train = ldadata[training,]
test = ldadata[-training,]
#Tests for Significance. Purpose : To test for significant differences between the groups on individual independent variables.
#To examine GROUP DIFFERENCES
#1) Difference between group means - Select the ones with maximum difference
#Descriptive stats
#install library psych
library(psych)
## Warning: package 'psych' was built under R version 3.4.3
describeBy(train,train$x4)
##
## Descriptive statistics by group
## group: 0
## vars n mean sd median trimmed mad min max range skew kurtosis
## x4 1 22 0.00 0.00 0.00 0.00 0.00 0.0 0.0 0.0 NaN NaN
## x6 2 22 8.80 0.80 8.70 8.85 0.82 6.7 10.0 3.3 -0.68 0.22
## x7 3 22 3.58 0.67 3.45 3.49 0.52 2.6 5.6 3.0 1.27 1.58
## x8 4 22 5.68 1.56 5.85 5.68 1.85 3.0 8.4 5.4 -0.05 -1.26
## x9 5 22 5.36 1.12 5.20 5.37 1.11 3.2 7.6 4.4 0.06 -0.82
## x10 6 22 3.94 1.00 3.95 3.93 1.19 2.3 5.6 3.3 0.07 -1.27
## x11 7 22 6.55 0.88 6.45 6.54 0.74 4.7 8.4 3.7 0.14 -0.53
## x12 8 22 4.70 0.91 4.75 4.73 0.82 2.9 6.6 3.7 -0.12 -0.28
## x13 9 22 5.72 1.29 5.60 5.68 1.41 3.7 8.5 4.8 0.35 -0.91
## x14 10 22 6.20 0.72 6.25 6.22 0.67 5.0 7.3 2.3 -0.24 -1.11
## x15 11 22 5.05 1.62 4.90 5.05 1.93 2.4 7.7 5.3 0.09 -1.27
## x16 12 22 4.16 0.84 4.30 4.18 1.04 2.7 5.4 2.7 -0.28 -1.37
## x17 13 22 3.66 0.59 3.80 3.69 0.59 2.6 4.5 1.9 -0.38 -1.10
## x18 14 22 3.75 0.64 3.90 3.80 0.59 2.5 4.6 2.1 -0.54 -0.91
## se
## x4 0.00
## x6 0.17
## x7 0.14
## x8 0.33
## x9 0.24
## x10 0.21
## x11 0.19
## x12 0.19
## x13 0.27
## x14 0.15
## x15 0.34
## x16 0.18
## x17 0.12
## x18 0.14
## --------------------------------------------------------
## group: 1
## vars n mean sd median trimmed mad min max range skew kurtosis
## x4 1 38 1.00 0.00 1.00 1.00 0.00 1.0 1.0 0.0 NaN NaN
## x6 2 38 7.26 1.37 7.10 7.20 1.48 5.0 9.9 4.9 0.41 -0.85
## x7 3 38 3.72 0.70 3.65 3.71 0.52 2.2 5.7 3.5 0.35 0.76
## x8 4 38 5.32 1.71 5.75 5.40 1.93 1.3 7.9 6.6 -0.49 -0.88
## x9 5 38 5.18 1.14 5.15 5.20 1.11 2.6 7.5 4.9 -0.13 -0.55
## x10 6 38 4.24 1.25 4.30 4.25 1.48 2.1 6.5 4.4 -0.08 -1.18
## x11 7 38 5.09 1.07 4.90 5.00 0.89 3.3 8.3 5.0 0.91 0.64
## x12 8 38 5.34 1.03 5.10 5.33 0.89 3.4 7.8 4.4 0.35 -0.47
## x13 9 38 7.67 1.20 7.85 7.71 1.33 4.5 9.9 5.4 -0.38 -0.31
## x14 10 38 6.01 0.88 6.05 6.00 0.89 4.3 8.1 3.8 0.07 -0.54
## x15 11 38 5.26 1.25 5.30 5.26 1.63 2.8 7.6 4.8 -0.02 -1.18
## x16 12 38 4.18 1.09 4.30 4.16 1.11 2.0 6.7 4.7 0.11 -0.34
## x17 13 38 5.04 1.13 5.05 5.05 1.11 3.0 7.3 4.3 -0.13 -0.85
## x18 14 38 3.75 0.66 3.70 3.75 0.74 2.4 5.2 2.8 -0.01 -0.69
## se
## x4 0.00
## x6 0.22
## x7 0.11
## x8 0.28
## x9 0.18
## x10 0.20
## x11 0.17
## x12 0.17
## x13 0.20
## x14 0.14
## x15 0.20
## x16 0.18
## x17 0.18
## x18 0.11
?describeBy
## starting httpd help server ... done
#Resut : Variables with largest differences between group means are : x6,x11,x12,x13,x17
#2) Test of equality for Group Means : One-way ANOVA
#To test if group means for independent variable is different. Variables having the most difference in the group means are the most significant variables.
anova(lm(x6 ~ x4, data = train)) #significant
## Analysis of Variance Table
##
## Response: x6
## Df Sum Sq Mean Sq F value Pr(>F)
## x4 1 32.715 32.715 22.752 1.284e-05 ***
## Residuals 58 83.398 1.438
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(lm(x7 ~ x4, data = train))
## Analysis of Variance Table
##
## Response: x7
## Df Sum Sq Mean Sq F value Pr(>F)
## x4 1 0.26 0.26000 0.5498 0.4614
## Residuals 58 27.43 0.47293
anova(lm(x8 ~ x4, data = train))
## Analysis of Variance Table
##
## Response: x8
## Df Sum Sq Mean Sq F value Pr(>F)
## x4 1 1.813 1.8135 0.6567 0.4211
## Residuals 58 160.176 2.7616
anova(lm(x9 ~ x4, data = train))
## Analysis of Variance Table
##
## Response: x9
## Df Sum Sq Mean Sq F value Pr(>F)
## x4 1 0.449 0.44856 0.3502 0.5563
## Residuals 58 74.281 1.28071
anova(lm(x10 ~ x4, data = train))
## Analysis of Variance Table
##
## Response: x10
## Df Sum Sq Mean Sq F value Pr(>F)
## x4 1 1.302 1.3025 0.9535 0.3329
## Residuals 58 79.224 1.3659
anova(lm(x11 ~ x4, data = train)) #significant
## Analysis of Variance Table
##
## Response: x11
## Df Sum Sq Mean Sq F value Pr(>F)
## x4 1 29.722 29.7217 29.242 1.261e-06 ***
## Residuals 58 58.951 1.0164
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(lm(x12 ~ x4, data = train)) #significant
## Analysis of Variance Table
##
## Response: x12
## Df Sum Sq Mean Sq F value Pr(>F)
## x4 1 5.711 5.7105 5.8888 0.01837 *
## Residuals 58 56.243 0.9697
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(lm(x13 ~ x4, data = train)) #significant
## Analysis of Variance Table
##
## Response: x13
## Df Sum Sq Mean Sq F value Pr(>F)
## x4 1 53.034 53.034 34.878 1.948e-07 ***
## Residuals 58 88.192 1.521
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(lm(x14 ~ x4, data = train))
## Analysis of Variance Table
##
## Response: x14
## Df Sum Sq Mean Sq F value Pr(>F)
## x4 1 0.539 0.53882 0.7957 0.3761
## Residuals 58 39.277 0.67719
anova(lm(x15 ~ x4, data = train))
## Analysis of Variance Table
##
## Response: x15
## Df Sum Sq Mean Sq F value Pr(>F)
## x4 1 0.618 0.61754 0.3183 0.5748
## Residuals 58 112.526 1.94010
anova(lm(x16 ~ x4, data = train))
## Analysis of Variance Table
##
## Response: x16
## Df Sum Sq Mean Sq F value Pr(>F)
## x4 1 0.005 0.00549 0.0055 0.9414
## Residuals 58 58.436 1.00752
anova(lm(x17 ~ x4, data = train)) #significant
## Analysis of Variance Table
##
## Response: x17
## Df Sum Sq Mean Sq F value Pr(>F)
## x4 1 26.375 26.3748 28.265 1.766e-06 ***
## Residuals 58 54.122 0.9331
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(lm(x18 ~ x4, data = train))
## Analysis of Variance Table
##
## Response: x18
## Df Sum Sq Mean Sq F value Pr(>F)
## x4 1 0.0001 0.0001 2e-04 0.988
## Residuals 58 24.4297 0.4212
#RESULT : There is significant difference in Group Means for x6,11,12,13,17 and hence they are the most significant variables. They are the ones that will help increase the difference between groups.
#Accept Alternate Hypothesis : Atleast one sample mean is not equal to others. x6,x11,x12,x13,x14,17
#STEPWISE ESTIMATION METHOD - add variables from most significant.
#This process continues to include variables in the discriminant function as long as they provide statistically significant additional discrimination between the groups beyond those differences already accounted for by the variables in the discriminant function.
#LINEAR DISCRIMINANT ANALYSIS
library(MASS)
## Warning: package 'MASS' was built under R version 3.4.3
library(caret)
## Warning: package 'caret' was built under R version 3.4.4
## Loading required package: lattice
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.4.2
##
## Attaching package: 'ggplot2'
##
## The following objects are masked from 'package:psych':
##
## %+%, alpha
#stepwise estimation method - add variables from most significant.
#add x13
LDAmodel = lda(x4 ~ x13,data = train)
LDAmodel
## Call:
## lda(x4 ~ x13, data = train)
##
## Prior probabilities of groups:
## 0 1
## 0.3666667 0.6333333
##
## Group means:
## x13
## 0 5.722727
## 1 7.673684
##
## Coefficients of linear discriminants:
## LD1
## x13 0.8109585
ldapred <- predict(LDAmodel)
ldapred #gives you z scores
## $class
## [1] 1 1 0 0 1 1 1 0 1 0 0 1 1 0 1 0 0 0 0 1 1 0 1 1 0 1 1 0 0 1 1 1 1 1 1
## [36] 0 1 0 1 1 1 1 1 0 0 1 1 1 0 1 1 1 1 0 1 1 1 1 1 1
## Levels: 0 1
##
## $posterior
## 0 1
## 12 0.077742190 0.92225781
## 62 0.366132225 0.63386777
## 60 0.753826239 0.24617376
## 61 0.853291201 0.14670880
## 83 0.491094874 0.50890513
## 97 0.233192268 0.76680773
## 1 0.336897415 0.66310258
## 22 0.906689954 0.09331005
## 99 0.110218562 0.88978144
## 47 0.959767291 0.04023271
## 63 0.647005584 0.35299442
## 49 0.336897415 0.66310258
## 25 0.098250544 0.90174946
## 81 0.853291201 0.14670880
## 26 0.256914569 0.74308543
## 72 0.836490740 0.16350926
## 88 0.617180351 0.38281965
## 23 0.964439647 0.03556035
## 16 0.853291201 0.14670880
## 19 0.025877197 0.97412280
## 86 0.153996404 0.84600360
## 24 0.523153709 0.47684629
## 13 0.153996404 0.84600360
## 4 0.037569870 0.96243013
## 17 0.523153709 0.47684629
## 95 0.190466230 0.80953377
## 39 0.029315934 0.97068407
## 67 0.523153709 0.47684629
## 98 0.703185523 0.29681448
## 77 0.190466230 0.80953377
## 32 0.396387434 0.60361257
## 87 0.077742190 0.92225781
## 21 0.061224008 0.93877599
## 34 0.042494628 0.95750537
## 100 0.012152330 0.98784767
## 50 0.882597597 0.11740240
## 78 0.054251662 0.94574834
## 76 0.853291201 0.14670880
## 92 0.009427815 0.99057218
## 65 0.233192268 0.76680773
## 73 0.042494628 0.95750537
## 74 0.336897415 0.66310258
## 69 0.153996404 0.84600360
## 36 0.703185523 0.29681448
## 58 0.916993249 0.08300675
## 28 0.029315934 0.97068407
## 37 0.336897415 0.66310258
## 80 0.123444848 0.87655515
## 64 0.523153709 0.47684629
## 40 0.077742190 0.92225781
## 71 0.061224008 0.93877599
## 82 0.336897415 0.66310258
## 35 0.061224008 0.93877599
## 79 0.895253839 0.10474616
## 8 0.308859507 0.69114049
## 96 0.366132225 0.63386777
## 93 0.256914569 0.74308543
## 33 0.022832332 0.97716767
## 46 0.061224008 0.93877599
## 48 0.077742190 0.92225781
##
## $x
## LD1
## 12 1.00694009
## 62 -0.20949760
## 60 -1.26374361
## 61 -1.66922284
## 83 -0.53388099
## 97 0.19598163
## 1 -0.12840176
## 22 -1.99360623
## 99 0.76365255
## 47 -2.56127715
## 63 -0.93936022
## 49 -0.12840176
## 25 0.84474840
## 81 -1.66922284
## 26 0.11488578
## 72 -1.58812699
## 88 -0.85826438
## 23 -2.64237300
## 16 -1.66922284
## 19 1.73680271
## 86 0.52036502
## 24 -0.61497684
## 13 0.52036502
## 4 1.49351517
## 17 -0.61497684
## 95 0.35817332
## 39 1.65570687
## 67 -0.61497684
## 98 -1.10155192
## 77 0.35817332
## 32 -0.29059345
## 87 1.00694009
## 21 1.16913179
## 34 1.41241933
## 100 2.22337779
## 50 -1.83141453
## 78 1.25022763
## 76 -1.66922284
## 92 2.38556949
## 65 0.19598163
## 73 1.41241933
## 74 -0.12840176
## 69 0.52036502
## 36 -1.10155192
## 58 -2.07470207
## 28 1.65570687
## 37 -0.12840176
## 80 0.68255671
## 64 -0.61497684
## 40 1.00694009
## 71 1.16913179
## 82 -0.12840176
## 35 1.16913179
## 79 -1.91251038
## 8 -0.04730591
## 96 -0.20949760
## 93 0.11488578
## 33 1.81789856
## 46 1.16913179
## 48 1.00694009
ldapred$class #classification of your trained model
## [1] 1 1 0 0 1 1 1 0 1 0 0 1 1 0 1 0 0 0 0 1 1 0 1 1 0 1 1 0 0 1 1 1 1 1 1
## [36] 0 1 0 1 1 1 1 1 0 0 1 1 1 0 1 1 1 1 0 1 1 1 1 1 1
## Levels: 0 1
confusionMatrix(ldapred$class,train$x4) #classification of your trained model vs classification of your original data
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 14 6
## 1 8 32
##
## Accuracy : 0.7667
## 95% CI : (0.6396, 0.8662)
## No Information Rate : 0.6333
## P-Value [Acc > NIR] : 0.01977
##
## Kappa : 0.4878
## Mcnemar's Test P-Value : 0.78927
##
## Sensitivity : 0.6364
## Specificity : 0.8421
## Pos Pred Value : 0.7000
## Neg Pred Value : 0.8000
## Prevalence : 0.3667
## Detection Rate : 0.2333
## Detection Prevalence : 0.3333
## Balanced Accuracy : 0.7392
##
## 'Positive' Class : 0
##
#RESULT : classification using only x13 as independent variable gives 76% accuracy, 14 misclassifications
#add x17
LDAmodel = lda(x4 ~ x13+x17,data = train)
ldapred <- predict(LDAmodel)
ldapred #gives you z scores
## $class
## [1] 1 1 0 0 0 1 1 0 1 0 0 0 1 0 1 0 0 0 0 1 1 1 1 1 1 0 1 1 0 1 0 1 1 1 1
## [36] 0 1 0 1 1 1 0 1 0 0 1 0 1 0 1 1 0 1 0 1 0 0 1 1 1
## Levels: 0 1
##
## $posterior
## 0 1
## 12 0.054113008 0.94588699
## 62 0.209307519 0.79069248
## 60 0.555590626 0.44440937
## 61 0.878450552 0.12154945
## 83 0.820401853 0.17959815
## 97 0.069000444 0.93099956
## 1 0.173698706 0.82630129
## 22 0.970740607 0.02925939
## 99 0.073741737 0.92625826
## 47 0.955602686 0.04439731
## 63 0.773328333 0.22667167
## 49 0.619349223 0.38065078
## 25 0.042198717 0.95780128
## 81 0.817026006 0.18297399
## 26 0.024226817 0.97577318
## 72 0.968707864 0.03129214
## 88 0.706052946 0.29394705
## 23 0.980190501 0.01980950
## 16 0.878450552 0.12154945
## 19 0.005615471 0.99438453
## 86 0.057215697 0.94278430
## 24 0.485987854 0.51401215
## 13 0.152050522 0.84794948
## 4 0.071827239 0.92817276
## 17 0.097715884 0.90228412
## 95 0.516647339 0.48335266
## 39 0.002129270 0.99787073
## 67 0.097715884 0.90228412
## 98 0.917627044 0.08237296
## 77 0.044650522 0.95534948
## 32 0.642611890 0.35738811
## 87 0.060615706 0.93938429
## 21 0.043882799 0.95611720
## 34 0.004242941 0.99575706
## 100 0.005968580 0.99403142
## 50 0.862595219 0.13740478
## 78 0.062385775 0.93761423
## 76 0.817026006 0.18297399
## 92 0.025325818 0.97467418
## 65 0.451505917 0.54849408
## 73 0.014001813 0.98599819
## 74 0.619349223 0.38065078
## 69 0.057215697 0.94278430
## 36 0.917627044 0.08237296
## 58 0.926123472 0.07387653
## 28 0.016247736 0.98375226
## 37 0.590590991 0.40940901
## 80 0.436930902 0.56306910
## 64 0.818890617 0.18110938
## 40 0.054113008 0.94588699
## 71 0.010709119 0.98929088
## 82 0.531372046 0.46862795
## 35 0.086345900 0.91365410
## 79 0.887708678 0.11229132
## 8 0.042520144 0.95747986
## 96 0.588116843 0.41188316
## 93 0.753867331 0.24613267
## 33 0.029855243 0.97014476
## 46 0.015295859 0.98470414
## 48 0.003172310 0.99682769
##
## $x
## LD1
## 12 0.9263975
## 62 0.1410719
## 60 -0.6547351
## 61 -1.5541775
## 83 -1.3189987
## 97 0.7936754
## 1 0.2592507
## 22 -2.3354411
## 99 0.7569905
## 47 -2.1136270
## 63 -1.1693757
## 49 -0.7898147
## 25 1.0602984
## 81 -1.3073386
## 26 1.3543037
## 72 -2.2999349
## 88 -0.9894873
## 23 -2.5403544
## 16 -1.5541775
## 19 2.1134258
## 86 0.8961320
## 24 -0.5115316
## 13 0.3407445
## 4 0.7715339
## 17 0.5992435
## 95 -0.5744200
## 39 2.6123443
## 67 0.5992435
## 98 -1.7759916
## 77 1.0300329
## 32 -0.8410430
## 87 0.8646878
## 21 1.0393356
## 34 2.2578080
## 100 2.0819815
## 50 -1.4819864
## 78 0.8489657
## 76 -1.3073386
## 92 1.3309834
## 65 -0.4405191
## 73 1.6407108
## 74 -0.7898147
## 69 0.8961320
## 36 -1.7759916
## 58 -1.8365226
## 28 1.5632789
## 37 -0.7281050
## 80 -0.4102536
## 64 -1.3137580
## 40 0.9263975
## 71 1.7798523
## 82 -0.6046855
## 35 0.6690772
## 79 -1.6001651
## 8 1.0562364
## 96 -0.7228643
## 93 -1.1140854
## 33 1.2442489
## 46 1.5947231
## 48 2.4074310
ldapred$class #classification of your trained model
## [1] 1 1 0 0 0 1 1 0 1 0 0 0 1 0 1 0 0 0 0 1 1 1 1 1 1 0 1 1 0 1 0 1 1 1 1
## [36] 0 1 0 1 1 1 0 1 0 0 1 0 1 0 1 1 0 1 0 1 0 0 1 1 1
## Levels: 0 1
confusionMatrix(ldapred$class,train$x4) #classification of your trained model vs classification of your original data
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 20 6
## 1 2 32
##
## Accuracy : 0.8667
## 95% CI : (0.7541, 0.9406)
## No Information Rate : 0.6333
## P-Value [Acc > NIR] : 5.39e-05
##
## Kappa : 0.7235
## Mcnemar's Test P-Value : 0.2888
##
## Sensitivity : 0.9091
## Specificity : 0.8421
## Pos Pred Value : 0.7692
## Neg Pred Value : 0.9412
## Prevalence : 0.3667
## Detection Rate : 0.3333
## Detection Prevalence : 0.4333
## Balanced Accuracy : 0.8756
##
## 'Positive' Class : 0
##
#RESULT : After step 2, Accuracy has improved to 86.67% and 8 misclassifications
#add x11
LDAmodel = lda(x4 ~ x13+x17+x11,data = train)
ldapred <- predict(LDAmodel)
ldapred #gives you z scores
## $class
## [1] 1 1 0 0 0 1 1 0 1 0 1 0 1 0 1 0 0 0 0 1 1 0 1 1 1 0 1 1 0 1 1 1 1 1 1
## [36] 0 1 0 1 1 1 0 1 0 0 1 0 1 0 1 1 0 1 0 1 0 0 1 1 1
## Levels: 0 1
##
## $posterior
## 0 1
## 12 0.014874561 0.985125439
## 62 0.097289604 0.902710396
## 60 0.508535722 0.491464278
## 61 0.965119653 0.034880347
## 83 0.739494216 0.260505784
## 97 0.027118895 0.972881105
## 1 0.095711757 0.904288243
## 22 0.993794861 0.006205139
## 99 0.064410843 0.935589157
## 47 0.888616437 0.111383563
## 63 0.489497558 0.510502442
## 49 0.804408766 0.195591234
## 25 0.039615814 0.960384186
## 81 0.770043119 0.229956881
## 26 0.006771730 0.993228270
## 72 0.963228636 0.036771364
## 88 0.867666969 0.132333031
## 23 0.988902438 0.011097562
## 16 0.965119653 0.034880347
## 19 0.010284364 0.989715636
## 86 0.018011983 0.981988017
## 24 0.746807045 0.253192955
## 13 0.363742038 0.636257962
## 4 0.039851575 0.960148425
## 17 0.088846057 0.911153943
## 95 0.633550675 0.366449325
## 39 0.002157952 0.997842048
## 67 0.088846057 0.911153943
## 98 0.902718002 0.097281998
## 77 0.022007065 0.977992935
## 32 0.450098578 0.549901422
## 87 0.021439514 0.978560486
## 21 0.017662154 0.982337846
## 34 0.002972800 0.997027200
## 100 0.002981323 0.997018677
## 50 0.915656262 0.084343738
## 78 0.150526435 0.849473565
## 76 0.770043119 0.229956881
## 92 0.004875571 0.995124429
## 65 0.230558474 0.769441526
## 73 0.037362391 0.962637609
## 74 0.804408766 0.195591234
## 69 0.018011983 0.981988017
## 36 0.902718002 0.097281998
## 58 0.937374016 0.062625984
## 28 0.011732499 0.988267501
## 37 0.603271576 0.396728424
## 80 0.277889909 0.722110091
## 64 0.556740313 0.443259687
## 40 0.014874561 0.985125439
## 71 0.012203586 0.987796414
## 82 0.715035468 0.284964532
## 35 0.082595808 0.917404192
## 79 0.957486497 0.042513503
## 8 0.008512652 0.991487348
## 96 0.599726525 0.400273475
## 93 0.961978532 0.038021468
## 33 0.026335692 0.973664308
## 46 0.043147488 0.956852512
## 48 0.005931666 0.994068334
##
## $x
## LD1
## 12 1.43236673
## 62 0.50762397
## 60 -0.55660086
## 61 -2.10278112
## 83 -1.03143687
## 97 1.14390419
## 1 0.51613896
## 22 -2.92891330
## 99 0.71849922
## 47 -1.51763536
## 63 -0.52076584
## 49 -1.20587322
## 25 0.95949919
## 81 -1.10917130
## 26 1.80646298
## 72 -2.07701736
## 88 -1.42532154
## 23 -2.65306168
## 16 -2.10278112
## 19 1.60818509
## 86 1.34081748
## 24 -1.04946377
## 13 -0.27744501
## 4 0.95659188
## 17 0.55472059
## 95 -0.79812921
## 39 2.34671680
## 67 0.55472059
## 98 -1.58873422
## 77 1.24464394
## 32 -0.44630466
## 87 1.25721033
## 21 1.35021323
## 34 2.19560717
## 100 2.19425614
## 50 -1.66257797
## 78 0.27367177
## 76 -1.10917130
## 92 1.96193186
## 65 0.02650478
## 73 0.98815671
## 74 -1.20587322
## 69 1.34081748
## 36 -1.58873422
## 58 -1.81368760
## 28 1.54551215
## 37 -0.73773260
## 80 -0.09122041
## 64 -0.64778416
## 40 1.43236673
## 71 1.52676520
## 82 -0.97338823
## 35 0.59225904
## 79 -2.00593206
## 8 1.69798638
## 96 -0.73077389
## 93 -2.06067647
## 33 1.15807138
## 46 0.91758622
## 48 1.86918044
ldapred$class #classification of your trained model
## [1] 1 1 0 0 0 1 1 0 1 0 1 0 1 0 1 0 0 0 0 1 1 0 1 1 1 0 1 1 0 1 1 1 1 1 1
## [36] 0 1 0 1 1 1 0 1 0 0 1 0 1 0 1 1 0 1 0 1 0 0 1 1 1
## Levels: 0 1
confusionMatrix(ldapred$class,train$x4) #classification of your trained model vs classification of your original data
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 19 6
## 1 3 32
##
## Accuracy : 0.85
## 95% CI : (0.7343, 0.929)
## No Information Rate : 0.6333
## P-Value [Acc > NIR] : 0.0001892
##
## Kappa : 0.686
## Mcnemar's Test P-Value : 0.5049851
##
## Sensitivity : 0.8636
## Specificity : 0.8421
## Pos Pred Value : 0.7600
## Neg Pred Value : 0.9143
## Prevalence : 0.3667
## Detection Rate : 0.3167
## Detection Prevalence : 0.4167
## Balanced Accuracy : 0.8529
##
## 'Positive' Class : 0
##
#RESULT : After step3, Accuracy has reduced 85% with 9 misclassification. Remove the variable
#check with x6
LDAmodel = lda(x4 ~ x13+x17+x6,data = train)
ldapred <- predict(LDAmodel)
ldapred #gives you z scores
## $class
## [1] 1 1 0 0 0 1 1 0 1 0 0 0 1 0 1 0 0 0 0 1 1 0 1 1 1 0 1 1 0 1 0 1 1 1 1
## [36] 0 1 0 1 1 1 0 1 0 0 1 0 1 0 1 1 0 1 0 1 0 0 1 1 1
## Levels: 0 1
##
## $posterior
## 0 1
## 12 0.023468233 0.97653177
## 62 0.267037163 0.73296284
## 60 0.777384011 0.22261599
## 61 0.888719217 0.11128078
## 83 0.831795723 0.16820428
## 97 0.061865578 0.93813442
## 1 0.239938320 0.76006168
## 22 0.981020404 0.01897960
## 99 0.080927263 0.91907274
## 47 0.978428651 0.02157135
## 63 0.812960396 0.18703960
## 49 0.811574412 0.18842559
## 25 0.017767018 0.98223298
## 81 0.914348132 0.08565187
## 26 0.013883267 0.98611673
## 72 0.896737086 0.10326291
## 88 0.625021746 0.37497825
## 23 0.976188929 0.02381107
## 16 0.888719217 0.11128078
## 19 0.002336802 0.99766320
## 86 0.055211863 0.94478814
## 24 0.654829451 0.34517055
## 13 0.329307358 0.67069264
## 4 0.035710932 0.96428907
## 17 0.032930360 0.96706964
## 95 0.674841280 0.32515872
## 39 0.004653972 0.99534603
## 67 0.032930360 0.96706964
## 98 0.920201419 0.07979858
## 77 0.047175755 0.95282425
## 32 0.618944535 0.38105546
## 87 0.013730109 0.98626989
## 21 0.011475802 0.98852420
## 34 0.007567779 0.99243222
## 100 0.009465358 0.99053464
## 50 0.815226549 0.18477345
## 78 0.161377439 0.83862256
## 76 0.914348132 0.08565187
## 92 0.020852359 0.97914764
## 65 0.251127161 0.74887284
## 73 0.008888524 0.99111148
## 74 0.811574412 0.18842559
## 69 0.055211863 0.94478814
## 36 0.920201419 0.07979858
## 58 0.871497783 0.12850222
## 28 0.013509939 0.98649006
## 37 0.694804134 0.30519587
## 80 0.298010585 0.70198941
## 64 0.742368000 0.25763200
## 40 0.023468233 0.97653177
## 71 0.032051054 0.96794895
## 82 0.607106309 0.39289369
## 35 0.050289281 0.94971072
## 79 0.925209413 0.07479059
## 8 0.020402550 0.97959745
## 96 0.640716516 0.35928348
## 93 0.831096137 0.16890386
## 33 0.019296335 0.98070367
## 46 0.006782470 0.99321753
## 48 0.001524178 0.99847582
##
## $x
## LD1
## 12 1.26779166
## 62 -0.05041033
## 60 -1.14631775
## 61 -1.54742365
## 83 -1.31501560
## 97 0.77834586
## 1 0.01907713
## 22 -2.45292648
## 99 0.63816181
## 47 -2.38957928
## 63 -1.25244479
## 49 -1.24803773
## 25 1.40555634
## 81 -1.68813174
## 26 1.52706836
## 72 -1.58803647
## 88 -0.78772107
## 23 -2.34057006
## 16 -1.54742365
## 19 2.39670907
## 86 0.83694453
## 24 -0.85047230
## 13 -0.19509057
## 4 1.05812015
## 17 1.09882102
## 95 -0.89402745
## 39 2.06153404
## 67 1.09882102
## 98 -1.72554772
## 77 0.91732077
## 32 -0.77518822
## 87 1.53252244
## 21 1.62059171
## 34 1.82437746
## 100 1.71496445
## 50 -1.25970507
## 78 0.25908799
## 76 -1.68813174
## 92 1.32639140
## 65 -0.01021307
## 73 1.74573441
## 74 -1.24803773
## 69 0.83694453
## 36 -1.72554772
## 58 -1.46816742
## 28 1.54046889
## 37 -0.93888412
## 80 -0.12455611
## 64 -1.05313803
## 40 1.26779166
## 71 1.11238476
## 82 -0.75099019
## 35 0.88474461
## 79 -1.75960581
## 8 1.33718782
## 96 -0.82047765
## 93 -1.31259515
## 33 1.36476418
## 46 1.87788288
## 48 2.60430376
ldapred$class #classification of your trained model
## [1] 1 1 0 0 0 1 1 0 1 0 0 0 1 0 1 0 0 0 0 1 1 0 1 1 1 0 1 1 0 1 0 1 1 1 1
## [36] 0 1 0 1 1 1 0 1 0 0 1 0 1 0 1 1 0 1 0 1 0 0 1 1 1
## Levels: 0 1
confusionMatrix(ldapred$class,train$x4) #classification of your trained model vs classification of your original data
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 20 7
## 1 2 31
##
## Accuracy : 0.85
## 95% CI : (0.7343, 0.929)
## No Information Rate : 0.6333
## P-Value [Acc > NIR] : 0.0001892
##
## Kappa : 0.6918
## Mcnemar's Test P-Value : 0.1824224
##
## Sensitivity : 0.9091
## Specificity : 0.8158
## Pos Pred Value : 0.7407
## Neg Pred Value : 0.9394
## Prevalence : 0.3667
## Detection Rate : 0.3333
## Detection Prevalence : 0.4500
## Balanced Accuracy : 0.8624
##
## 'Positive' Class : 0
##
#Result : Same as before, adding x6 to x13 and x17 gives 85% accuracy with 9 missclassifications
#add x6 and x11 both with x13 and x17
LDAmodelb = lda(x4 ~ x13+x17+x6+x11,data = train)
ldapredb <- predict(LDAmodelb)
ldapredb #gives you z scores
## $class
## [1] 1 1 0 0 0 1 1 0 1 0 0 0 1 0 1 0 0 0 0 1 1 0 0 1 1 0 1 1 0 1 1 1 1 1 1
## [36] 0 1 0 1 1 1 0 1 0 0 1 0 1 0 1 1 0 1 0 1 0 0 1 1 1
## Levels: 0 1
##
## $posterior
## 0 1
## 12 0.009561173 0.990438827
## 62 0.134194786 0.865805214
## 60 0.693499592 0.306500408
## 61 0.960903462 0.039096538
## 83 0.763090129 0.236909871
## 97 0.028578764 0.971421236
## 1 0.136025691 0.863974309
## 22 0.994387333 0.005612667
## 99 0.070370044 0.929629956
## 47 0.940850736 0.059149264
## 63 0.578719292 0.421280708
## 49 0.880518374 0.119481626
## 25 0.021179467 0.978820533
## 81 0.868825908 0.131174092
## 26 0.005375060 0.994624940
## 72 0.913311007 0.086688993
## 88 0.812789800 0.187210200
## 23 0.986176409 0.013823591
## 16 0.960903462 0.039096538
## 19 0.004961347 0.995038653
## 86 0.020729559 0.979270441
## 24 0.807070207 0.192929793
## 13 0.503943220 0.496056780
## 4 0.025710964 0.974289036
## 17 0.040558693 0.959441307
## 95 0.724454340 0.275545660
## 39 0.003828315 0.996171685
## 67 0.040558693 0.959441307
## 98 0.907150711 0.092849289
## 77 0.025401040 0.974598960
## 32 0.459464080 0.540535920
## 87 0.008209808 0.991790192
## 21 0.007425842 0.992574158
## 34 0.004790737 0.995209263
## 100 0.004627728 0.995372272
## 50 0.885653373 0.114346627
## 78 0.251693657 0.748306343
## 76 0.868825908 0.131174092
## 92 0.005344181 0.994655819
## 65 0.151738612 0.848261388
## 73 0.023414044 0.976585956
## 74 0.880518374 0.119481626
## 69 0.020729559 0.979270441
## 36 0.907150711 0.092849289
## 58 0.902806180 0.097193820
## 28 0.010727914 0.989272086
## 37 0.678544798 0.321455202
## 80 0.214465971 0.785534029
## 64 0.519934195 0.480065805
## 40 0.009561173 0.990438827
## 71 0.026844164 0.973155836
## 82 0.737690493 0.262309507
## 35 0.055902504 0.944097496
## 79 0.964225093 0.035774907
## 8 0.006184717 0.993815283
## 96 0.636661182 0.363338818
## 93 0.963650881 0.036349119
## 33 0.019443898 0.980556102
## 46 0.020719642 0.979280358
## 48 0.003167628 0.996832372
##
## $x
## LD1
## 12 1.58897141
## 62 0.31454343
## 60 -0.91619384
## 61 -2.01123242
## 83 -1.07832343
## 97 1.07740368
## 1 0.30735047
## 22 -2.91802784
## 99 0.64354108
## 47 -1.81147930
## 63 -0.68710861
## 49 -1.45827877
## 25 1.21844104
## 81 -1.40928118
## 26 1.85530834
## 72 -1.62235326
## 88 -1.21537795
## 23 -2.50043268
## 16 -2.01123242
## 19 1.89226782
## 86 1.22850907
## 24 -1.19832044
## 13 -0.54858465
## 4 1.12730263
## 17 0.91099070
## 95 -0.98511672
## 39 2.01180697
## 67 0.91099070
## 98 -1.58773038
## 77 1.13301604
## 32 -0.46674350
## 87 1.65955179
## 21 1.70598901
## 34 1.90841096
## 100 1.92437858
## 50 -1.48111461
## 78 -0.04113413
## 76 -1.40928118
## 92 1.85796757
## 65 0.24874028
## 73 1.17134469
## 74 -1.45827877
## 69 1.22850907
## 36 -1.58773038
## 58 -1.56453316
## 28 1.53557284
## 37 -0.88431592
## 80 0.05463514
## 64 -0.57796823
## 40 1.58897141
## 71 1.10696798
## 82 -1.01602808
## 35 0.75629083
## 79 -2.05357671
## 8 1.79052083
## 96 -0.79884029
## 93 -2.04599326
## 33 1.25850482
## 46 1.22873339
## 48 2.09907955
ldapredb$class #classification of your trained model
## [1] 1 1 0 0 0 1 1 0 1 0 0 0 1 0 1 0 0 0 0 1 1 0 0 1 1 0 1 1 0 1 1 1 1 1 1
## [36] 0 1 0 1 1 1 0 1 0 0 1 0 1 0 1 1 0 1 0 1 0 0 1 1 1
## Levels: 0 1
confusionMatrix(ldapredb$class,train$x4) #classification of your trained model vs classification of your original data
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 21 6
## 1 1 32
##
## Accuracy : 0.8833
## 95% CI : (0.7743, 0.9518)
## No Information Rate : 0.6333
## P-Value [Acc > NIR] : 1.343e-05
##
## Kappa : 0.7603
## Mcnemar's Test P-Value : 0.1306
##
## Sensitivity : 0.9545
## Specificity : 0.8421
## Pos Pred Value : 0.7778
## Neg Pred Value : 0.9697
## Prevalence : 0.3667
## Detection Rate : 0.3500
## Detection Prevalence : 0.4500
## Balanced Accuracy : 0.8983
##
## 'Positive' Class : 0
##
#RESULT: Improvement. Accuracy = 88.33% misclassifications = 7
#Check LDA for all significant variables
LDAmodel = lda(x4 ~ x13+x17+x11+x6+x12,data = train)
ldapred <- predict(LDAmodel)
ldapred #gives you z scores
## $class
## [1] 1 1 0 0 0 1 1 0 1 0 1 0 1 0 1 0 0 0 0 1 1 0 1 1 1 0 1 1 0 1 0 1 1 1 1
## [36] 0 1 0 1 1 1 0 1 0 0 1 0 1 0 1 1 0 1 0 1 1 0 1 1 1
## Levels: 0 1
##
## $posterior
## 0 1
## 12 0.001297257 0.998702743
## 62 0.044557768 0.955442232
## 60 0.919220474 0.080779526
## 61 0.993061710 0.006938290
## 83 0.876495380 0.123504620
## 97 0.025759045 0.974240955
## 1 0.045326003 0.954673997
## 22 0.947389717 0.052610283
## 99 0.008897638 0.991102362
## 47 0.947890789 0.052109211
## 63 0.499969972 0.500030028
## 49 0.672096011 0.327903989
## 25 0.015230677 0.984769323
## 81 0.887157008 0.112842992
## 26 0.002533657 0.997466343
## 72 0.927194438 0.072805562
## 88 0.927132103 0.072867897
## 23 0.991000082 0.008999918
## 16 0.993061710 0.006938290
## 19 0.003668916 0.996331084
## 86 0.023332394 0.976667606
## 24 0.925118197 0.074881803
## 13 0.272067035 0.727932965
## 4 0.032265620 0.967734380
## 17 0.034375281 0.965624719
## 95 0.670019155 0.329980845
## 39 0.005226174 0.994773826
## 67 0.034375281 0.965624719
## 98 0.986584603 0.013415397
## 77 0.021398734 0.978601266
## 32 0.672114967 0.327885033
## 87 0.005310978 0.994689022
## 21 0.004729004 0.995270996
## 34 0.007293596 0.992706404
## 100 0.005245026 0.994754974
## 50 0.798313853 0.201686147
## 78 0.405676657 0.594323343
## 76 0.887157008 0.112842992
## 92 0.004779397 0.995220603
## 65 0.115124074 0.884875926
## 73 0.013864336 0.986135664
## 74 0.672096011 0.327903989
## 69 0.023332394 0.976667606
## 36 0.986584603 0.013415397
## 58 0.853691224 0.146308776
## 28 0.023377983 0.976622017
## 37 0.789427302 0.210572698
## 80 0.069026177 0.930973823
## 64 0.749811226 0.250188774
## 40 0.001297257 0.998702743
## 71 0.004439686 0.995560314
## 82 0.858599803 0.141400197
## 35 0.008927505 0.991072495
## 79 0.943329900 0.056670100
## 8 0.002972001 0.997027999
## 96 0.467664724 0.532335276
## 93 0.987276676 0.012723324
## 33 0.021221033 0.978778967
## 46 0.020072991 0.979927009
## 48 0.001667263 0.998332737
##
## $x
## LD1
## 12 2.2535832
## 62 0.7448403
## 60 -1.5713479
## 61 -2.6381544
## 83 -1.3724122
## 97 0.9839447
## 1 0.7372988
## 22 -1.7647417
## 99 1.4390594
## 47 -1.7689966
## 63 -0.5466807
## 49 -0.8491190
## 25 1.2098771
## 81 -1.4155456
## 26 1.9710112
## 72 -1.6187776
## 88 -1.6183886
## 23 -2.5276628
## 16 -2.6381544
## 19 1.8145374
## 86 1.0266816
## 24 -1.6059855
## 13 -0.1320648
## 4 0.8862285
## 17 0.8586232
## 95 -0.8451548
## 39 1.6648152
## 67 0.8586232
## 98 -2.3575874
## 77 1.0639655
## 32 -0.8491553
## 87 1.6579971
## 21 1.7071450
## 34 1.5234986
## 100 1.6632901
## 50 -1.1264068
## 78 -0.3858353
## 76 -1.4155456
## 92 1.7026575
## 65 0.3125658
## 73 1.2500638
## 74 -0.8491190
## 69 1.0266816
## 36 -2.3575874
## 58 -1.2899127
## 28 1.0258395
## 37 -1.1035229
## 80 0.5494890
## 64 -1.0091970
## 40 2.2535832
## 71 1.7338671
## 82 -1.3067067
## 35 1.4376347
## 79 -1.7316118
## 8 1.9035919
## 96 -0.4921585
## 93 -2.3801997
## 33 1.0675555
## 46 1.0914834
## 48 2.1476995
ldapred$class #classification of your trained model
## [1] 1 1 0 0 0 1 1 0 1 0 1 0 1 0 1 0 0 0 0 1 1 0 1 1 1 0 1 1 0 1 0 1 1 1 1
## [36] 0 1 0 1 1 1 0 1 0 0 1 0 1 0 1 1 0 1 0 1 1 0 1 1 1
## Levels: 0 1
confusionMatrix(ldapred$class,train$x4) #classification of your trained model vs classification of your original data
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 18 7
## 1 4 31
##
## Accuracy : 0.8167
## 95% CI : (0.6956, 0.9048)
## No Information Rate : 0.6333
## P-Value [Acc > NIR] : 0.001641
##
## Kappa : 0.6163
## Mcnemar's Test P-Value : 0.546494
##
## Sensitivity : 0.8182
## Specificity : 0.8158
## Pos Pred Value : 0.7200
## Neg Pred Value : 0.8857
## Prevalence : 0.3667
## Detection Rate : 0.3000
## Detection Prevalence : 0.4167
## Balanced Accuracy : 0.8170
##
## 'Positive' Class : 0
##
#RESULT: Not good accuracy 81.67% and 11 misclassifications.
#Check LDA for all significant variables + x7 #what I have found is adding x7 to the significant set of variables, improves the model and boosts the training accuracy.
#the reason for this might be beacause x7 gives high scores on group 1 (customers outside north america) x7 helps classify them well. So adding it improves the model
#also x7 has higher discriminant loadings so it is better to include and see if it improves our model.
LDAmodel1 = lda(x4 ~ x13+x17+x11+x12+x6+x7,data = train)
ldapred <- predict(LDAmodel1)
ldapred #gives you z scores
## $class
## [1] 1 1 0 0 0 1 1 0 1 0 0 1 1 0 1 0 0 0 0 1 1 0 0 1 1 0 1 1 0 1 1 1 1 1 1
## [36] 0 0 0 1 1 1 1 1 0 0 1 0 1 1 1 1 0 1 0 1 0 0 1 1 1
## Levels: 0 1
##
## $posterior
## 0 1
## 12 0.0020631821 0.997936818
## 62 0.0171496338 0.982850366
## 60 0.9409519836 0.059048016
## 61 0.9978378218 0.002162178
## 83 0.8257367079 0.174263292
## 97 0.0132097986 0.986790201
## 1 0.0202851198 0.979714880
## 22 0.9931135752 0.006886425
## 99 0.0090648071 0.990935193
## 47 0.9963349803 0.003665020
## 63 0.7029225059 0.297077494
## 49 0.2684102553 0.731589745
## 25 0.0200973495 0.979902651
## 81 0.7634619092 0.236538091
## 26 0.0008732469 0.999126753
## 72 0.8999006799 0.100099320
## 88 0.7758347813 0.224165219
## 23 0.9963551846 0.003644815
## 16 0.9978378218 0.002162178
## 19 0.0005543808 0.999445619
## 86 0.0209592984 0.979040702
## 24 0.6529371394 0.347062861
## 13 0.8773135417 0.122686458
## 4 0.0156398314 0.984360169
## 17 0.0541465108 0.945853489
## 95 0.6505501736 0.349449826
## 39 0.0035239300 0.996476070
## 67 0.0541465108 0.945853489
## 98 0.9981490629 0.001850937
## 77 0.0142648819 0.985735118
## 32 0.3614116712 0.638588329
## 87 0.0024922587 0.997507741
## 21 0.0032501705 0.996749830
## 34 0.0033395093 0.996660491
## 100 0.0008299149 0.999170085
## 50 0.7840604754 0.215939525
## 78 0.5765273540 0.423472646
## 76 0.7634619092 0.236538091
## 92 0.0154899021 0.984510098
## 65 0.0907915801 0.909208420
## 73 0.0141630051 0.985836995
## 74 0.2684102553 0.731589745
## 69 0.0209592984 0.979040702
## 36 0.9981490629 0.001850937
## 58 0.8040505506 0.195949449
## 28 0.0185130501 0.981486950
## 37 0.8256934385 0.174306562
## 80 0.0082415369 0.991758463
## 64 0.3822178486 0.617782151
## 40 0.0020631821 0.997936818
## 71 0.0019568376 0.998043162
## 82 0.8223181235 0.177681876
## 35 0.0012850160 0.998714984
## 79 0.8706134751 0.129386525
## 8 0.0006011474 0.999398853
## 96 0.8654775820 0.134522418
## 93 0.9971292963 0.002870704
## 33 0.0171362045 0.982863795
## 46 0.0240499054 0.975950095
## 48 0.0004332342 0.999566766
##
## $x
## LD1
## 12 1.8166904
## 62 0.9975970
## 60 -1.6202562
## 61 -2.9128324
## 83 -1.1545077
## 97 1.0993686
## 1 0.9318893
## 22 -2.4661507
## 99 1.2455836
## 47 -2.7096010
## 63 -0.8878250
## 49 -0.1720336
## 25 0.9355341
## 81 -1.0070625
## 26 2.1473208
## 72 -1.4004372
## 88 -1.0338678
## 23 -2.7117317
## 16 -2.9128324
## 19 2.3219279
## 86 0.9190696
## 24 -0.7997781
## 13 -1.3125399
## 4 1.0335759
## 17 0.5413516
## 95 -0.7957396
## 39 1.6105530
## 67 0.5413516
## 98 -2.9726377
## 77 1.0694493
## 32 -0.3384900
## 87 1.7439691
## 21 1.6417137
## 34 1.6312661
## 100 2.1668821
## 50 -1.0522743
## 78 -0.6755712
## 76 -1.0070625
## 92 1.0373335
## 65 0.3276897
## 73 1.0722413
## 74 -0.1720336
## 69 0.9190696
## 36 -2.9726377
## 58 -1.0992464
## 28 0.9676870
## 37 -1.1543922
## 80 1.2824658
## 64 -0.3727049
## 40 1.8166904
## 71 1.8370534
## 82 -1.1454541
## 35 1.9988135
## 79 -1.2891768
## 8 2.2908090
## 96 -1.2719563
## 93 -2.8037134
## 33 0.9979031
## 46 0.8650344
## 48 2.4166628
ldapred$class #classification of your trained model
## [1] 1 1 0 0 0 1 1 0 1 0 0 1 1 0 1 0 0 0 0 1 1 0 0 1 1 0 1 1 0 1 1 1 1 1 1
## [36] 0 0 0 1 1 1 1 1 0 0 1 0 1 1 1 1 0 1 0 1 0 0 1 1 1
## Levels: 0 1
confusionMatrix(ldapred$class,train$x4)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 22 3
## 1 0 35
##
## Accuracy : 0.95
## 95% CI : (0.8608, 0.9896)
## No Information Rate : 0.6333
## P-Value [Acc > NIR] : 9.108e-09
##
## Kappa : 0.8953
## Mcnemar's Test P-Value : 0.2482
##
## Sensitivity : 1.0000
## Specificity : 0.9211
## Pos Pred Value : 0.8800
## Neg Pred Value : 1.0000
## Prevalence : 0.3667
## Detection Rate : 0.3667
## Detection Prevalence : 0.4167
## Balanced Accuracy : 0.9605
##
## 'Positive' Class : 0
##
#The trained LDA model includes independent variables - x13,x17,x11,x12,x6,x7
#Accuracy = 95%
#Misclassifications = 3
#To test this model on Hold-Out Sample
#Test Data - Randomly sampled 40 values
LDA_predictions = predict(LDAmodel1, newdata = test)
LDA_predictions
## $class
## [1] 0 0 0 1 1 1 1 0 0 1 1 1 0 0 1 0 0 1 0 0 1 0 1 0 0 0 1 0 1 0 1 1 1 1 1
## [36] 0 0 1 0 0
## Levels: 0 1
##
## $posterior
## 0 1
## 2 0.9968406813 0.003159319
## 3 0.8661615613 0.133838439
## 5 0.8256934385 0.174306562
## 6 0.0152866325 0.984713368
## 7 0.0002401579 0.999759842
## 9 0.0006500176 0.999349982
## 10 0.0216180019 0.978381998
## 11 0.8223181235 0.177681876
## 14 0.9971292963 0.002870704
## 15 0.0279916765 0.972008324
## 18 0.0240499054 0.975950095
## 20 0.0019568376 0.998043162
## 27 0.9965140364 0.003485964
## 29 0.9965140364 0.003485964
## 30 0.0217737680 0.978226232
## 31 0.9023149032 0.097685097
## 38 0.8679249397 0.132075060
## 41 0.0021669634 0.997833037
## 42 0.5765273540 0.423472646
## 43 0.7345047078 0.265495292
## 44 0.0009352668 0.999064733
## 45 0.9949095148 0.005090485
## 51 0.0961218086 0.903878191
## 52 0.9968406813 0.003159319
## 53 0.6432731371 0.356726863
## 54 0.9023149032 0.097685097
## 55 0.0033395093 0.996660491
## 56 0.6505501736 0.349449826
## 57 0.0157169173 0.984283083
## 59 0.9963349803 0.003665020
## 66 0.0004332342 0.999566766
## 68 0.0430291795 0.956970821
## 70 0.0008732469 0.999126753
## 75 0.0202851198 0.979714880
## 84 0.0006655142 0.999334486
## 85 0.7758347813 0.224165219
## 89 0.5682531626 0.431746837
## 90 0.0009576500 0.999042350
## 91 0.6458633135 0.354136687
## 94 0.8706134751 0.129386525
##
## $x
## LD1
## 2 -2.7668137
## 3 -1.2742171
## 5 -1.1543922
## 6 1.0424854
## 7 2.6433001
## 9 2.2607757
## 10 0.9069281
## 11 -1.1454541
## 14 -2.8037134
## 15 0.8051956
## 18 0.8650344
## 20 1.8370534
## 27 -2.7289052
## 29 -2.7289052
## 30 0.9041099
## 31 -1.4108414
## 38 -1.2800914
## 41 1.7978039
## 42 -0.6755712
## 43 -0.9478645
## 44 2.1209482
## 45 -2.5828864
## 51 0.3035237
## 52 -2.7668137
## 53 -0.7835050
## 54 -1.4108414
## 55 1.6312661
## 56 -0.7957396
## 57 1.0316577
## 59 -2.7096010
## 66 2.4166628
## 68 0.6340918
## 70 2.1473208
## 75 0.9318893
## 84 2.2517221
## 85 -1.0338678
## 89 -0.6625890
## 90 2.1118574
## 91 -0.7878467
## 94 -1.2891768
LDA_predictions$class ## classification of objects after running on your lda trained model
## [1] 0 0 0 1 1 1 1 0 0 1 1 1 0 0 1 0 0 1 0 0 1 0 1 0 0 0 1 0 1 0 1 1 1 1 1
## [36] 0 0 1 0 0
## Levels: 0 1
confusionMatrix(LDA_predictions$class,test$x4)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 17 4
## 1 0 19
##
## Accuracy : 0.9
## 95% CI : (0.7634, 0.9721)
## No Information Rate : 0.575
## P-Value [Acc > NIR] : 7.728e-06
##
## Kappa : 0.8015
## Mcnemar's Test P-Value : 0.1336
##
## Sensitivity : 1.0000
## Specificity : 0.8261
## Pos Pred Value : 0.8095
## Neg Pred Value : 1.0000
## Prevalence : 0.4250
## Detection Rate : 0.4250
## Detection Prevalence : 0.5250
## Balanced Accuracy : 0.9130
##
## 'Positive' Class : 0
##
#RESULT : 90% Accuracy and 4 misclassifications.
#CONCLUSION -
#1)Differences are found in a subset of only five perceptions, allowing the company to fouce on key variables not deal with entire set
# x13(Competitive Pricing) x17(Price Flexibility) x11(Product Line) x7(E-Commerce Activities) x6(Product Quality) x12 (Salesforce Image)
#2)Firms and customers in USA have better perception of HBAT than customers outside USA on Product Value and Product Line
# While firms Outside USA have favourable perception on price flexibility and e-commerce activities
#3)Training gave 95% accuracy and testing gave 90% accuracy.