Data Exploration
data <- read_csv("Factor-Hair-Revised.csv")
dim(data)
## [1] 100 13
str(data)
## spc_tbl_ [100 × 13] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ ID : num [1:100] 1 2 3 4 5 6 7 8 9 10 ...
## $ ProdQual : num [1:100] 8.5 8.2 9.2 6.4 9 6.5 6.9 6.2 5.8 6.4 ...
## $ Ecom : num [1:100] 3.9 2.7 3.4 3.3 3.4 2.8 3.7 3.3 3.6 4.5 ...
## $ TechSup : num [1:100] 2.5 5.1 5.6 7 5.2 3.1 5 3.9 5.1 5.1 ...
## $ CompRes : num [1:100] 5.9 7.2 5.6 3.7 4.6 4.1 2.6 4.8 6.7 6.1 ...
## $ Advertising : num [1:100] 4.8 3.4 5.4 4.7 2.2 4 2.1 4.6 3.7 4.7 ...
## $ ProdLine : num [1:100] 4.9 7.9 7.4 4.7 6 4.3 2.3 3.6 5.9 5.7 ...
## $ SalesFImage : num [1:100] 6 3.1 5.8 4.5 4.5 3.7 5.4 5.1 5.8 5.7 ...
## $ ComPricing : num [1:100] 6.8 5.3 4.5 8.8 6.8 8.5 8.9 6.9 9.3 8.4 ...
## $ WartyClaim : num [1:100] 4.7 5.5 6.2 7 6.1 5.1 4.8 5.4 5.9 5.4 ...
## $ OrdBilling : num [1:100] 5 3.9 5.4 4.3 4.5 3.6 2.1 4.3 4.4 4.1 ...
## $ DelSpeed : num [1:100] 3.7 4.9 4.5 3 3.5 3.3 2 3.7 4.6 4.4 ...
## $ Satisfaction: num [1:100] 8.2 5.7 8.9 4.8 7.1 4.7 5.7 6.3 7 5.5 ...
## - attr(*, "spec")=
## .. cols(
## .. ID = col_double(),
## .. ProdQual = col_double(),
## .. Ecom = col_double(),
## .. TechSup = col_double(),
## .. CompRes = col_double(),
## .. Advertising = col_double(),
## .. ProdLine = col_double(),
## .. SalesFImage = col_double(),
## .. ComPricing = col_double(),
## .. WartyClaim = col_double(),
## .. OrdBilling = col_double(),
## .. DelSpeed = col_double(),
## .. Satisfaction = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
names(data)
## [1] "ID" "ProdQual" "Ecom" "TechSup" "CompRes"
## [6] "Advertising" "ProdLine" "SalesFImage" "ComPricing" "WartyClaim"
## [11] "OrdBilling" "DelSpeed" "Satisfaction"
describe(data)
## data
##
## 13 Variables 100 Observations
## --------------------------------------------------------------------------------
## ID
## n missing distinct Info Mean Gmd .05 .10
## 100 0 100 1 50.5 33.67 5.95 10.90
## .25 .50 .75 .90 .95
## 25.75 50.50 75.25 90.10 95.05
##
## lowest : 1 2 3 4 5, highest: 96 97 98 99 100
## --------------------------------------------------------------------------------
## ProdQual
## n missing distinct Info Mean Gmd .05 .10
## 100 0 43 0.999 7.81 1.61 5.595 5.790
## .25 .50 .75 .90 .95
## 6.575 8.000 9.100 9.410 9.900
##
## lowest : 5 5.1 5.2 5.5 5.6, highest: 9.4 9.5 9.6 9.9 10
## --------------------------------------------------------------------------------
## Ecom
## n missing distinct Info Mean Gmd .05 .10
## 100 0 27 0.996 3.672 0.7674 2.595 2.800
## .25 .50 .75 .90 .95
## 3.275 3.600 3.925 4.530 5.100
##
## lowest : 2.2 2.4 2.5 2.6 2.7, highest: 4.9 5.1 5.5 5.6 5.7
## --------------------------------------------------------------------------------
## TechSup
## n missing distinct Info Mean Gmd .05 .10
## 100 0 50 0.999 5.365 1.755 2.700 3.280
## .25 .50 .75 .90 .95
## 4.250 5.400 6.625 7.210 7.605
##
## lowest : 1.3 2.5 2.6 2.7 3 , highest: 7.7 7.9 8 8.4 8.5
## --------------------------------------------------------------------------------
## CompRes
## n missing distinct Info Mean Gmd .05 .10
## 100 0 45 0.999 5.442 1.388 3.595 3.900
## .25 .50 .75 .90 .95
## 4.600 5.450 6.325 7.010 7.305
##
## lowest : 2.6 3 3.2 3.5 3.6, highest: 7.4 7.5 7.6 7.7 7.8
## --------------------------------------------------------------------------------
## Advertising
## n missing distinct Info Mean Gmd .05 .10
## 100 0 41 0.999 4.01 1.302 2.200 2.400
## .25 .50 .75 .90 .95
## 3.175 4.000 4.800 5.510 5.800
##
## lowest : 1.9 2.1 2.2 2.3 2.4, highest: 5.7 5.8 5.9 6.3 6.5
## --------------------------------------------------------------------------------
## ProdLine
## n missing distinct Info Mean Gmd .05 .10
## 100 0 42 0.999 5.805 1.509 3.900 4.190
## .25 .50 .75 .90 .95
## 4.700 5.750 6.800 7.600 7.805
##
## lowest : 2.3 2.9 3.3 3.6 3.9, highest: 7.7 7.8 7.9 8.3 8.4
## --------------------------------------------------------------------------------
## SalesFImage
## n missing distinct Info Mean Gmd .05 .10
## 100 0 35 0.997 5.123 1.19 3.385 3.790
## .25 .50 .75 .90 .95
## 4.500 4.900 5.800 6.610 7.100
##
## lowest : 2.9 3 3.1 3.4 3.5, highest: 6.8 6.9 7.1 7.8 8.2
## --------------------------------------------------------------------------------
## ComPricing
## n missing distinct Info Mean Gmd .05 .10
## 100 0 45 0.998 6.974 1.778 4.500 4.800
## .25 .50 .75 .90 .95
## 5.875 7.100 8.400 8.810 9.105
##
## lowest : 3.7 3.8 4.4 4.5 4.6, highest: 9.2 9.3 9.6 9.7 9.9
## --------------------------------------------------------------------------------
## WartyClaim
## n missing distinct Info Mean Gmd .05 .10
## 100 0 34 0.998 6.043 0.9372 4.795 5.000
## .25 .50 .75 .90 .95
## 5.400 6.100 6.600 7.200 7.305
##
## lowest : 4.1 4.3 4.5 4.7 4.8, highest: 7.3 7.4 7.5 7.7 8.1
## --------------------------------------------------------------------------------
## OrdBilling
## n missing distinct Info Mean Gmd .05 .10
## 100 0 37 0.998 4.278 1.033 2.595 3.000
## .25 .50 .75 .90 .95
## 3.700 4.400 4.800 5.400 5.605
##
## lowest : 2 2.1 2.4 2.5 2.6, highest: 5.5 5.6 5.7 6.5 6.7
## --------------------------------------------------------------------------------
## DelSpeed
## n missing distinct Info Mean Gmd .05 .10
## 100 0 30 0.997 3.886 0.8267 2.595 2.990
## .25 .50 .75 .90 .95
## 3.400 3.900 4.425 4.710 4.900
##
## lowest : 1.6 2 2.4 2.5 2.6, highest: 4.7 4.8 4.9 5.2 5.5
## --------------------------------------------------------------------------------
## Satisfaction
## n missing distinct Info Mean Gmd .05 .10
## 100 0 29 0.997 6.918 1.371 5.190 5.400
## .25 .50 .75 .90 .95
## 6.000 7.050 7.625 8.600 8.900
##
## lowest : 4.7 4.8 5 5.2 5.4, highest: 8.6 8.7 8.9 9 9.9
## --------------------------------------------------------------------------------
data_X <- subset(data, select=-c(ID))
Correlation Analysis
Inter-Item Correlation Matrix
datamatrix <- data_X %>%
dplyr::select(-Satisfaction) %>%
cor() # from library corrplot
corrplot(datamatrix, method="number")

corrplot(datamatrix, order="hclust", type='upper',tl.srt = 45)

Correlation Coefficient
coeff <- pcor(data_X[-12], method="pearson") # from library ppcor
# 1. To Extract the correlation coefficients
coeff$estimate
## ProdQual Ecom TechSup CompRes Advertising
## ProdQual 1.00000000 -0.06137387 0.04526368 0.06182758 0.10718506
## Ecom -0.06137387 1.00000000 0.06805570 -0.09741963 0.01546781
## TechSup 0.04526368 0.06805570 1.00000000 0.15566994 -0.06193553
## CompRes 0.06182758 -0.09741963 0.15566994 1.00000000 -0.07373805
## Advertising 0.10718506 0.01546781 -0.06193553 -0.07373805 1.00000000
## ProdLine 0.50256298 0.10050431 -0.11741341 0.05359792 -0.14272350
## SalesFImage 0.04162563 0.72474230 -0.07590728 0.12393577 0.31079614
## ComPricing -0.08486137 0.04664698 -0.13853368 -0.01994195 -0.05965306
## WartyClaim -0.12211328 -0.09991399 0.78713506 -0.12737815 0.03173627
## OrdBilling 0.18447638 0.11302141 -0.15973237 0.32236273 -0.03983344
## DelSpeed -0.35476917 -0.04045236 -0.01707376 0.55487929 0.20164019
## ProdLine SalesFImage ComPricing WartyClaim OrdBilling
## ProdQual 0.50256298 0.04162563 -0.08486137 -0.12211328 0.18447638
## Ecom 0.10050431 0.72474230 0.04664698 -0.09991399 0.11302141
## TechSup -0.11741341 -0.07590728 -0.13853368 0.78713506 -0.15973237
## CompRes 0.05359792 0.12393577 -0.01994195 -0.12737815 0.32236273
## Advertising -0.14272350 0.31079614 -0.05965306 0.03173627 -0.03983344
## ProdLine 1.00000000 -0.14787285 -0.38577264 0.24605237 -0.26098863
## SalesFImage -0.14787285 1.00000000 0.09204079 0.17477777 -0.11325620
## ComPricing -0.38577264 0.09204079 1.00000000 0.02832801 -0.10102366
## WartyClaim 0.24605237 0.17477777 0.02832801 1.00000000 0.25041217
## OrdBilling -0.26098863 -0.11325620 -0.10102366 0.25041217 1.00000000
## DelSpeed 0.52936161 0.08692144 0.18404681 -0.10038822 0.36943703
## DelSpeed
## ProdQual -0.35476917
## Ecom -0.04045236
## TechSup -0.01707376
## CompRes 0.55487929
## Advertising 0.20164019
## ProdLine 0.52936161
## SalesFImage 0.08692144
## ComPricing 0.18404681
## WartyClaim -0.10038822
## OrdBilling 0.36943703
## DelSpeed 1.00000000
# 2. To Extract p-values
coeff$p.value
## ProdQual Ecom TechSup CompRes Advertising
## ProdQual 0.000000e+00 5.633173e-01 6.700805e-01 5.604309e-01 0.311890776
## Ecom 5.633173e-01 0.000000e+00 5.215362e-01 3.582709e-01 0.884298394
## TechSup 6.700805e-01 5.215362e-01 0.000000e+00 1.406252e-01 0.559745101
## CompRes 5.604309e-01 3.582709e-01 1.406252e-01 0.000000e+00 0.487281744
## Advertising 3.118908e-01 8.842984e-01 5.597451e-01 4.872817e-01 0.000000000
## ProdLine 3.851133e-07 3.431808e-01 2.676856e-01 6.138493e-01 0.177146062
## SalesFImage 6.952327e-01 4.615957e-16 4.745275e-01 2.418211e-01 0.002713553
## ComPricing 4.238387e-01 6.606097e-01 1.903370e-01 8.511723e-01 0.574328622
## WartyClaim 2.488675e-01 3.460372e-01 2.232181e-20 2.288909e-01 0.765218696
## OrdBilling 8.002810e-02 2.861193e-01 1.304285e-01 1.831537e-03 0.707747645
## DelSpeed 5.596498e-04 7.034167e-01 8.723821e-01 1.146684e-08 0.055279741
## ProdLine SalesFImage ComPricing WartyClaim OrdBilling
## ProdQual 3.851133e-07 6.952327e-01 0.4238387206 2.488675e-01 0.0800281045
## Ecom 3.431808e-01 4.615957e-16 0.6606096903 3.460372e-01 0.2861193395
## TechSup 2.676856e-01 4.745275e-01 0.1903369540 2.232181e-20 0.1304284879
## CompRes 6.138493e-01 2.418211e-01 0.8511722571 2.288909e-01 0.0018315375
## Advertising 1.771461e-01 2.713553e-03 0.5743286224 7.652187e-01 0.7077476451
## ProdLine 0.000000e+00 1.618670e-01 0.0001590844 1.872256e-02 0.0124641303
## SalesFImage 1.618670e-01 0.000000e+00 0.3855479040 9.751825e-02 0.2851130484
## ComPricing 1.590844e-04 3.855479e-01 0.0000000000 7.898158e-01 0.3406799281
## WartyClaim 1.872256e-02 9.751825e-02 0.7898158257 0.000000e+00 0.0166642142
## OrdBilling 1.246413e-02 2.851130e-01 0.3406799281 1.666421e-02 0.0000000000
## DelSpeed 6.855711e-08 4.126338e-01 0.0807457060 3.437413e-01 0.0003135242
## DelSpeed
## ProdQual 5.596498e-04
## Ecom 7.034167e-01
## TechSup 8.723821e-01
## CompRes 1.146684e-08
## Advertising 5.527974e-02
## ProdLine 6.855711e-08
## SalesFImage 4.126338e-01
## ComPricing 8.074571e-02
## WartyClaim 3.437413e-01
## OrdBilling 3.135242e-04
## DelSpeed 0.000000e+00
corrplot(coeff$estimate, type="upper", order="hclust",
p.mat = coeff$p.value, sig.level = 0.01, insig = "blank")

VIF
model <- lm(Satisfaction ~., data = data_X)
vif(model)
## ProdQual Ecom TechSup CompRes Advertising ProdLine
## 1.635797 2.756694 2.976796 4.730448 1.508933 3.488185
## SalesFImage ComPricing WartyClaim OrdBilling DelSpeed
## 3.439420 1.635000 3.198337 2.902999 6.516014
Factor Analysis Test KMO
data_fa <- data_X[,-12]
datamatrix <- cor(data_fa)
KMO(r=datamatrix)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = datamatrix)
## Overall MSA = 0.65
## MSA for each item =
## ProdQual Ecom TechSup CompRes Advertising ProdLine
## 0.51 0.63 0.52 0.79 0.78 0.62
## SalesFImage ComPricing WartyClaim OrdBilling DelSpeed
## 0.62 0.75 0.51 0.76 0.67
Bartlett Test
cortest.bartlett(datamatrix, nrow(data_X))
## $chisq
## [1] 619.2726
##
## $p.value
## [1] 1.79337e-96
##
## $df
## [1] 55
Factor Analysis
Scree plot
ev <- eigen(cor(data_fa))
ev$values
## [1] 3.42697133 2.55089671 1.69097648 1.08655606 0.60942409 0.55188378
## [7] 0.40151815 0.24695154 0.20355327 0.13284158 0.09842702
#Plot a Scree plot using base plot:
Factor = c(1,2,3,4,5,6,7,8,9,10,11)
Eigen_Values <-ev$values
Scree <- data.frame(Factor, Eigen_Values)
plot(Scree, main = "Scree Plot", col= "Blue",ylim=c(0,4))
lines(Scree,col='Red')
abline(h = 1, col="Green")

Varimax Rotation
nfactors <- 4
fit1 <-factanal(data_fa,nfactors,scores = c("regression"),rotation = "varimax")
print(fit1)
##
## Call:
## factanal(x = data_fa, factors = nfactors, scores = c("regression"), rotation = "varimax")
##
## Uniquenesses:
## ProdQual Ecom TechSup CompRes Advertising ProdLine
## 0.682 0.360 0.228 0.178 0.679 0.005
## SalesFImage ComPricing WartyClaim OrdBilling DelSpeed
## 0.017 0.636 0.163 0.347 0.076
##
## Loadings:
## Factor1 Factor2 Factor3 Factor4
## ProdQual 0.557
## Ecom 0.793
## TechSup 0.872 0.102
## CompRes 0.884 0.142 0.135
## Advertising 0.190 0.521 -0.110
## ProdLine 0.502 0.104 0.856
## SalesFImage 0.119 0.974 -0.130
## ComPricing 0.225 -0.216 -0.514
## WartyClaim 0.894 0.158
## OrdBilling 0.794 0.101 0.105
## DelSpeed 0.928 0.189 0.164
##
## Factor1 Factor2 Factor3 Factor4
## SS loadings 2.592 1.977 1.638 1.423
## Proportion Var 0.236 0.180 0.149 0.129
## Cumulative Var 0.236 0.415 0.564 0.694
##
## Test of the hypothesis that 4 factors are sufficient.
## The chi square statistic is 24.26 on 17 degrees of freedom.
## The p-value is 0.113
Diagram
fa_var <- fa(r=data_fa, nfactors = 4, rotate="varimax",fm="pa")
fa.diagram(fa_var)

head(fa_var$scores)
## PA1 PA2 PA3 PA4
## [1,] -0.1338871 0.9175166 -1.719604873 0.09135411
## [2,] 1.6297604 -2.0090053 -0.596361722 0.65808192
## [3,] 0.3637658 0.8361736 0.002979966 1.37548765
## [4,] -1.2225230 -0.5491336 1.245473305 -0.64421384
## [5,] -0.4854209 -0.4276223 -0.026980304 0.47360747
## [6,] -0.5950924 -1.3035333 -1.183019401 -0.95913571
Regression
regdata <- cbind(data_X[12], fa_var$scores)
#Labeling the data
names(regdata) <- c("Satisfaction", "Purchase", "Marketing",
"Post_purchase", "Prod_positioning")
head(regdata)
## Satisfaction Purchase Marketing Post_purchase Prod_positioning
## 1 8.2 -0.1338871 0.9175166 -1.719604873 0.09135411
## 2 5.7 1.6297604 -2.0090053 -0.596361722 0.65808192
## 3 8.9 0.3637658 0.8361736 0.002979966 1.37548765
## 4 4.8 -1.2225230 -0.5491336 1.245473305 -0.64421384
## 5 7.1 -0.4854209 -0.4276223 -0.026980304 0.47360747
## 6 4.7 -0.5950924 -1.3035333 -1.183019401 -0.95913571
set.seed(100)
indices= sample(1:nrow(regdata), 0.7*nrow(regdata))
train=regdata[indices,]
test = regdata[-indices,]
#Regression Model using train data
model1 = lm(Satisfaction~., train)
summary(model1)
##
## Call:
## lm(formula = Satisfaction ~ ., data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.76280 -0.48717 0.06799 0.46459 1.24022
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.91852 0.08068 85.757 < 2e-16 ***
## Purchase 0.50230 0.07641 6.574 9.74e-09 ***
## Marketing 0.75488 0.08390 8.998 5.00e-13 ***
## Post_purchase 0.08755 0.08216 1.066 0.291
## Prod_positioning 0.58074 0.08781 6.614 8.30e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6629 on 65 degrees of freedom
## Multiple R-squared: 0.7261, Adjusted R-squared: 0.7093
## F-statistic: 43.08 on 4 and 65 DF, p-value: < 2.2e-16
Testing VIF
print(vif(model1))
## Purchase Marketing Post_purchase Prod_positioning
## 1.009648 1.008235 1.015126 1.024050