Data Exploration
data <- read.csv("Factor-Hair-Revised.csv")
dim(data)
## [1] 100 13
str(data)
## 'data.frame': 100 obs. of 13 variables:
## $ ID : int 1 2 3 4 5 6 7 8 9 10 ...
## $ ProdQual : num 8.5 8.2 9.2 6.4 9 6.5 6.9 6.2 5.8 6.4 ...
## $ Ecom : num 3.9 2.7 3.4 3.3 3.4 2.8 3.7 3.3 3.6 4.5 ...
## $ TechSup : num 2.5 5.1 5.6 7 5.2 3.1 5 3.9 5.1 5.1 ...
## $ CompRes : num 5.9 7.2 5.6 3.7 4.6 4.1 2.6 4.8 6.7 6.1 ...
## $ Advertising : num 4.8 3.4 5.4 4.7 2.2 4 2.1 4.6 3.7 4.7 ...
## $ ProdLine : num 4.9 7.9 7.4 4.7 6 4.3 2.3 3.6 5.9 5.7 ...
## $ SalesFImage : num 6 3.1 5.8 4.5 4.5 3.7 5.4 5.1 5.8 5.7 ...
## $ ComPricing : num 6.8 5.3 4.5 8.8 6.8 8.5 8.9 6.9 9.3 8.4 ...
## $ WartyClaim : num 4.7 5.5 6.2 7 6.1 5.1 4.8 5.4 5.9 5.4 ...
## $ OrdBilling : num 5 3.9 5.4 4.3 4.5 3.6 2.1 4.3 4.4 4.1 ...
## $ DelSpeed : num 3.7 4.9 4.5 3 3.5 3.3 2 3.7 4.6 4.4 ...
## $ Satisfaction: num 8.2 5.7 8.9 4.8 7.1 4.7 5.7 6.3 7 5.5 ...
names(data)
## [1] "ID" "ProdQual" "Ecom" "TechSup" "CompRes"
## [6] "Advertising" "ProdLine" "SalesFImage" "ComPricing" "WartyClaim"
## [11] "OrdBilling" "DelSpeed" "Satisfaction"
data_X <- subset(data,select=-c(1))
Correlation Analysis
library(corrplot)
## corrplot 0.95 loaded
datamatrix <- cor(data_X[-12])
corrplot(datamatrix, method="number")
library(ppcor)
## Warning: package 'ppcor' was built under R version 4.5.1
## Loading required package: MASS
coeff <- pcor(data_X[-12], method="pearson")
corrplot(coeff$estimate, type ="upper", order ="hclust",
p.mat = coeff$p.value, sig.level=0.01, insig="blank")
library(car)
## Loading required package: carData
model <- lm(Satisfaction ~., data=data_X)
vif(model)
## ProdQual Ecom TechSup CompRes Advertising ProdLine
## 1.635797 2.756694 2.976796 4.730448 1.508933 3.488185
## SalesFImage ComPricing WartyClaim OrdBilling DelSpeed
## 3.439420 1.635000 3.198337 2.902999 6.516014
library(psych)
## Warning: package 'psych' was built under R version 4.5.1
##
## Attaching package: 'psych'
## The following object is masked from 'package:car':
##
## logit
data_fa <-data_X[,-12]
datamatrix <- cor(data_fa)
KMO(r=datamatrix)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = datamatrix)
## Overall MSA = 0.65
## MSA for each item =
## ProdQual Ecom TechSup CompRes Advertising ProdLine
## 0.51 0.63 0.52 0.79 0.78 0.62
## SalesFImage ComPricing WartyClaim OrdBilling DelSpeed
## 0.62 0.75 0.51 0.76 0.67
cortest.bartlett(datamatrix, nrow(data_X))
## $chisq
## [1] 619.2726
##
## $p.value
## [1] 1.79337e-96
##
## $df
## [1] 55
Factor Analysis
ev <- eigen(cor(data_fa))
ev$values
## [1] 3.42697133 2.55089671 1.69097648 1.08655606 0.60942409 0.55188378
## [7] 0.40151815 0.24695154 0.20355327 0.13284158 0.09842702
library(psych)
Factor = c(1,2,3,4,5,6,7,8,9,10,11)
Eigen_Values <- ev$values
scree <- data.frame(Factor, Eigen_Values)
plot(scree, main="scree Plot", col="Blue", ylim=c(0,4))
lines(scree,col="Red")
abline(h =1, col="Green")
nfactors <-4
fit1 <-factanal(data_fa, nfactors, scores=c("regression"), rotation="varimax")
print(fit1)
##
## Call:
## factanal(x = data_fa, factors = nfactors, scores = c("regression"), rotation = "varimax")
##
## Uniquenesses:
## ProdQual Ecom TechSup CompRes Advertising ProdLine
## 0.682 0.360 0.228 0.178 0.679 0.005
## SalesFImage ComPricing WartyClaim OrdBilling DelSpeed
## 0.017 0.636 0.163 0.347 0.076
##
## Loadings:
## Factor1 Factor2 Factor3 Factor4
## ProdQual 0.557
## Ecom 0.793
## TechSup 0.872 0.102
## CompRes 0.884 0.142 0.135
## Advertising 0.190 0.521 -0.110
## ProdLine 0.502 0.104 0.856
## SalesFImage 0.119 0.974 -0.130
## ComPricing 0.225 -0.216 -0.514
## WartyClaim 0.894 0.158
## OrdBilling 0.794 0.101 0.105
## DelSpeed 0.928 0.189 0.164
##
## Factor1 Factor2 Factor3 Factor4
## SS loadings 2.592 1.977 1.638 1.423
## Proportion Var 0.236 0.180 0.149 0.129
## Cumulative Var 0.236 0.415 0.564 0.694
##
## Test of the hypothesis that 4 factors are sufficient.
## The chi square statistic is 24.26 on 17 degrees of freedom.
## The p-value is 0.113
fa_var <- fa(r=data_fa, nfactors=4, rotate="varimax", fm="pa")
fa.diagram(fa_var)
head(fa_var$scores)
## PA1 PA2 PA3 PA4
## 1 -0.1338871 0.9175166 -1.719604873 0.09135411
## 2 1.6297604 -2.0090053 -0.596361722 0.65808192
## 3 0.3637658 0.8361736 0.002979966 1.37548765
## 4 -1.2225230 -0.5491336 1.245473305 -0.64421384
## 5 -0.4854209 -0.4276223 -0.026980304 0.47360747
## 6 -0.5950924 -1.3035333 -1.183019401 -0.95913571
Regression
regdata <- cbind(data_X[12], fa_var$scores)
names(regdata) <- c("Satisfaction", "Purchase", "Marketing",
"Post_purchase", "Prod_positioning")
head(regdata)
## Satisfaction Purchase Marketing Post_purchase Prod_positioning
## 1 8.2 -0.1338871 0.9175166 -1.719604873 0.09135411
## 2 5.7 1.6297604 -2.0090053 -0.596361722 0.65808192
## 3 8.9 0.3637658 0.8361736 0.002979966 1.37548765
## 4 4.8 -1.2225230 -0.5491336 1.245473305 -0.64421384
## 5 7.1 -0.4854209 -0.4276223 -0.026980304 0.47360747
## 6 4.7 -0.5950924 -1.3035333 -1.183019401 -0.95913571
set.seed(100)
indices = sample(1:nrow(regdata), 0.7*nrow(regdata))
train=regdata[indices,]
test=regdata[-indices,]
model1=lm(Satisfaction~., train)
summary(model1)
##
## Call:
## lm(formula = Satisfaction ~ ., data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.76280 -0.48717 0.06799 0.46459 1.24022
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.91852 0.08068 85.757 < 2e-16 ***
## Purchase 0.50230 0.07641 6.574 9.74e-09 ***
## Marketing 0.75488 0.08390 8.998 5.00e-13 ***
## Post_purchase 0.08755 0.08216 1.066 0.291
## Prod_positioning 0.58074 0.08781 6.614 8.30e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6629 on 65 degrees of freedom
## Multiple R-squared: 0.7261, Adjusted R-squared: 0.7093
## F-statistic: 43.08 on 4 and 65 DF, p-value: < 2.2e-16
print(vif(model1))
## Purchase Marketing Post_purchase Prod_positioning
## 1.009648 1.008235 1.015126 1.024050
pred_test1 <- predict(model1, newdata=test, type="response")
pred_test1
## 6 8 11 13 17 19 26 33
## 4.975008 5.908267 6.951629 8.677431 6.613838 6.963113 6.313513 6.141338
## 34 35 37 40 42 44 49 50
## 6.158993 7.415742 6.589746 6.858206 7.133989 8.533080 8.765145 8.078744
## 53 56 57 60 65 67 71 73
## 7.395438 7.468360 8.744402 6.276660 5.936570 6.650322 8.299545 7.685564
## 75 80 96 97 99 100
## 7.330191 6.719528 7.540233 6.143172 8.084583 5.799897
test$Satisfaction_Predicted <-pred_test1
head(test[c(1,6)], 10)
## Satisfaction Satisfaction_Predicted
## 6 4.7 4.975008
## 8 6.3 5.908267
## 11 7.4 6.951629
## 13 8.4 8.677431
## 17 6.4 6.613838
## 19 6.8 6.963113
## 26 6.6 6.313513
## 33 5.4 6.141338
## 34 7.3 6.158993
## 35 6.3 7.415742